Merge branch 'next/dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Jul 2011 06:32:02 +0000 (23:32 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Jul 2011 06:32:02 +0000 (23:32 -0700)
* 'next/dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc: (21 commits)
  arm/dt: tegra devicetree support
  arm/versatile: Add device tree support
  dt/irq: add irq_domain_generate_simple() helper
  irq: add irq_domain translation infrastructure
  dmaengine: imx-sdma: add device tree probe support
  dmaengine: imx-sdma: sdma_get_firmware does not need to copy fw_name
  dmaengine: imx-sdma: use platform_device_id to identify sdma version
  mmc: sdhci-esdhc-imx: add device tree probe support
  mmc: sdhci-pltfm: dt device does not pass parent to sdhci_alloc_host
  mmc: sdhci-esdhc-imx: get rid of the uses of cpu_is_mx()
  mmc: sdhci-esdhc-imx: do not reference platform data after probe
  mmc: sdhci-esdhc-imx: extend card_detect and write_protect support for mx5
  net/fec: add device tree probe support
  net: ibm_newemac: convert it to use of_get_phy_mode
  dt/net: add helper function of_get_phy_mode
  net/fec: gasket needs to be enabled for some i.mx
  serial/imx: add device tree probe support
  serial/imx: get rid of the uses of cpu_is_mx1()
  arm/dt: Add dtb make rule
  arm/dt: Add skeleton dtsi file
  ...

365 files changed:
Documentation/devicetree/bindings/gpio/gpio_keys.txt [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/samsung-wdt.txt [new file with mode: 0644]
Documentation/filesystems/nfs/Exporting
Documentation/md.txt
Documentation/security/keys-ecryptfs.txt [new file with mode: 0644]
Documentation/security/keys-trusted-encrypted.txt
Documentation/watchdog/00-INDEX
Documentation/watchdog/watchdog-kernel-api.txt [new file with mode: 0644]
MAINTAINERS
arch/arm/mach-tegra/Kconfig
arch/arm/mach-zynq/Makefile
arch/arm/mach-zynq/board_dt.c [deleted file]
arch/microblaze/include/asm/cpuinfo.h
arch/microblaze/include/asm/irqflags.h
arch/microblaze/include/asm/processor.h
arch/microblaze/include/asm/prom.h
arch/microblaze/include/asm/pvr.h
arch/microblaze/include/asm/setup.h
arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
arch/microblaze/kernel/cpu/cpuinfo-static.c
arch/microblaze/kernel/cpu/cpuinfo.c
arch/microblaze/kernel/cpu/mb.c
arch/microblaze/kernel/early_printk.c
arch/microblaze/kernel/hw_exception_handler.S
arch/microblaze/kernel/intc.c
arch/microblaze/kernel/process.c
arch/microblaze/kernel/prom.c
arch/microblaze/kernel/setup.c
arch/sparc/include/asm/elf_64.h
arch/sparc/include/asm/hypervisor.h
arch/sparc/include/asm/spitfire.h
arch/sparc/include/asm/xor_64.h
arch/sparc/kernel/cpu.c
arch/sparc/kernel/cpumap.c
arch/sparc/kernel/head_64.S
arch/sparc/kernel/hvapi.c
arch/sparc/kernel/pcr.c
arch/sparc/kernel/perf_event.c
arch/sparc/lib/atomic32.c
drivers/char/hw_random/n2-drv.c
drivers/char/hw_random/n2rng.h
drivers/char/tpm/tpm.c
drivers/char/tpm/tpm.h
drivers/char/tpm/tpm_nsc.c
drivers/char/tpm/tpm_tis.c
drivers/crypto/n2_core.c
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/input/joystick/xpad.c
drivers/input/keyboard/adp5588-keys.c
drivers/input/keyboard/adp5589-keys.c
drivers/input/keyboard/atkbd.c
drivers/input/keyboard/gpio_keys.c
drivers/input/keyboard/lm8323.c
drivers/input/keyboard/mpr121_touchkey.c
drivers/input/keyboard/pmic8xxx-keypad.c
drivers/input/keyboard/qt1070.c
drivers/input/keyboard/sh_keysc.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/keyboard/tnetv107x-keypad.c
drivers/input/misc/Kconfig
drivers/input/misc/Makefile
drivers/input/misc/bfin_rotary.c
drivers/input/misc/kxtj9.c [new file with mode: 0644]
drivers/input/misc/mma8450.c [new file with mode: 0644]
drivers/input/misc/mpu3050.c [new file with mode: 0644]
drivers/input/misc/xen-kbdfront.c
drivers/input/mouse/gpio_mouse.c
drivers/input/mouse/lifebook.c
drivers/input/mouse/pxa930_trkball.c
drivers/input/mouse/sentelic.c
drivers/input/mouse/synaptics.c
drivers/input/mouse/synaptics.h
drivers/input/serio/at32psif.c
drivers/input/serio/hp_sdc.c
drivers/input/tablet/aiptek.c
drivers/input/tablet/wacom_wac.c
drivers/input/touchscreen/ads7846.c
drivers/input/touchscreen/atmel-wm97xx.c
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/input/touchscreen/cy8ctmg110_ts.c
drivers/input/touchscreen/intel-mid-touch.c
drivers/input/touchscreen/mainstone-wm97xx.c
drivers/input/touchscreen/tnetv107x-ts.c
drivers/input/touchscreen/wm9705.c
drivers/input/touchscreen/wm9712.c
drivers/input/touchscreen/wm9713.c
drivers/input/touchscreen/zylonite-wm97xx.c
drivers/isdn/i4l/isdn_net.c
drivers/md/bitmap.c
drivers/md/bitmap.h
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid1.h
drivers/md/raid10.c
drivers/md/raid10.h
drivers/md/raid5.c
drivers/md/raid5.h
drivers/net/Makefile
drivers/net/acenic.c
drivers/net/acenic.h
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_sysfs.c
drivers/net/forcedeth.c
drivers/net/gianfar.c
drivers/net/ifb.c
drivers/net/macvlan.c
drivers/net/tg3.c
drivers/net/tg3.h
drivers/net/tun.c
drivers/net/usb/asix.c
drivers/net/veth.c
drivers/net/wan/hdlc_fr.c
drivers/net/wireless/airo.c
drivers/net/wireless/b43/Kconfig
drivers/net/wireless/b43/bus.c
drivers/net/wireless/b43/main.c
drivers/net/wireless/hostap/hostap_main.c
drivers/nfc/pn533.c
drivers/pci/pci-label.c
drivers/scsi/be2iscsi/be_main.h
drivers/scsi/bnx2i/bnx2i_hwi.c
drivers/scsi/bnx2i/bnx2i_iscsi.c
drivers/scsi/libiscsi.c
drivers/staging/ath6kl/os/linux/ar6000_drv.c
drivers/staging/brcm80211/brcmsmac/mac80211_if.h
drivers/target/Kconfig
drivers/target/Makefile
drivers/target/iscsi/Kconfig [new file with mode: 0644]
drivers/target/iscsi/Makefile [new file with mode: 0644]
drivers/target/iscsi/iscsi_target.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_auth.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_auth.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_configfs.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_configfs.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_core.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_datain_values.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_datain_values.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_device.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_device.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_erl0.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_erl0.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_erl1.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_erl1.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_erl2.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_erl2.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_login.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_login.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_nego.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_nego.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_nodeattrib.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_nodeattrib.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_parameters.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_parameters.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_seq_pdu_list.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_seq_pdu_list.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_stat.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_stat.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_tmr.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_tmr.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_tpg.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_tpg.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_tq.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_tq.h [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_util.c [new file with mode: 0644]
drivers/target/iscsi/iscsi_target_util.h [new file with mode: 0644]
drivers/target/target_core_transport.c
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/at91sam9_wdt.c
drivers/watchdog/at91sam9_wdt.h [moved from arch/arm/mach-at91/include/mach/at91_wdt.h with 96% similarity]
drivers/watchdog/dw_wdt.c [new file with mode: 0644]
drivers/watchdog/hpwdt.c
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/imx2_wdt.c
drivers/watchdog/it8712f_wdt.c
drivers/watchdog/it87_wdt.c
drivers/watchdog/mpcore_wdt.c
drivers/watchdog/mtx-1_wdt.c
drivers/watchdog/of_xilinx_wdt.c [new file with mode: 0644]
drivers/watchdog/pc87413_wdt.c
drivers/watchdog/s3c2410_wdt.c
drivers/watchdog/sch311x_wdt.c
drivers/watchdog/sp805_wdt.c
drivers/watchdog/watchdog_core.c [new file with mode: 0644]
drivers/watchdog/watchdog_dev.c [new file with mode: 0644]
drivers/watchdog/watchdog_dev.h [new file with mode: 0644]
fs/anon_inodes.c
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/locking.c
fs/btrfs/locking.h
fs/btrfs/relocation.c
fs/btrfs/struct-funcs.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/dcache.c
fs/ecryptfs/ecryptfs_kernel.h
fs/ecryptfs/keystore.c
fs/ext2/acl.h
fs/gfs2/ops_fstype.c
fs/inode.c
fs/jffs2/fs.c
fs/jfs/jfs_dmap.c
fs/jfs/jfs_txnmgr.c
fs/jfs/namei.c
fs/lockd/clntproc.c
fs/nfs/Kconfig
fs/nfs/callback_proc.c
fs/nfs/client.c
fs/nfs/delegation.c
fs/nfs/internal.h
fs/nfs/namespace.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/objlayout/objio_osd.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c
fs/nfs/read.c
fs/nfs/unlink.c
fs/nfs/write.c
fs/omfs/dir.c
fs/open.c
fs/pipe.c
fs/proc/generic.c
fs/proc/proc_net.c
fs/proc/root.c
fs/read_write.c
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_dir2.c
fs/xfs/xfs_filestream.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_rename.c
fs/xfs/xfs_vnodeops.c
include/keys/encrypted-type.h
include/linux/ecryptfs.h [new file with mode: 0644]
include/linux/fs.h
include/linux/if.h
include/linux/input.h
include/linux/input/kxtj9.h [new file with mode: 0644]
include/linux/kernel.h
include/linux/netdevice.h
include/linux/nfs4.h
include/linux/nfs_fs_sb.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
include/linux/pnfs_osd_xdr.h
include/linux/proc_fs.h
include/linux/raid/md_p.h
include/linux/sunrpc/bc_xprt.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/xprt.h
include/linux/watchdog.h
include/linux/wm97xx.h
include/scsi/iscsi_proto.h
include/sound/pcm.h
include/sound/pcm_params.h
include/sound/soc-dapm.h
kernel/cgroup.c
kernel/compat.c
kernel/signal.c
net/8021q/vlan_dev.c
net/bluetooth/bnep/netdev.c
net/core/dev.c
net/core/pktgen.c
net/ethernet/eth.c
net/ipv4/devinet.c
net/ipv6/addrconf.c
net/l2tp/l2tp_eth.c
net/mac80211/iface.c
net/socket.c
net/sunrpc/Kconfig
net/sunrpc/Makefile
net/sunrpc/backchannel_rqst.c
net/sunrpc/bc_svc.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/svc.c
net/sunrpc/svcsock.c
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
net/wireless/reg.c
security/apparmor/domain.c
security/apparmor/lsm.c
security/integrity/ima/ima_main.c
security/keys/Makefile
security/keys/ecryptfs_format.c [new file with mode: 0644]
security/keys/ecryptfs_format.h [new file with mode: 0644]
security/keys/encrypted.c
security/keys/request_key_auth.c
security/tomoyo/Kconfig
security/tomoyo/Makefile
security/tomoyo/audit.c [new file with mode: 0644]
security/tomoyo/common.c
security/tomoyo/common.h
security/tomoyo/condition.c [new file with mode: 0644]
security/tomoyo/domain.c
security/tomoyo/file.c
security/tomoyo/gc.c
security/tomoyo/group.c
security/tomoyo/load_policy.c
security/tomoyo/memory.c
security/tomoyo/mount.c
security/tomoyo/realpath.c
security/tomoyo/securityfs_if.c
security/tomoyo/tomoyo.c
security/tomoyo/util.c
sound/core/pcm_lib.c
sound/isa/msnd/msnd.h
sound/oss/ad1848.c
sound/oss/sb_mixer.c
sound/pci/asihpi/asihpi.c
sound/pci/asihpi/hpioctl.c
sound/pci/hda/Kconfig
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_codec.h
sound/pci/hda/hda_local.h
sound/pci/hda/patch_analog.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/pci/hda/patch_via.c
sound/soc/codecs/sgtl5000.c
sound/soc/codecs/wm8962.c
sound/soc/davinci/davinci-vcif.c
sound/soc/samsung/i2s.c
sound/soc/soc-core.c
sound/soc/soc-dapm.c

diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
new file mode 100644 (file)
index 0000000..7190c99
--- /dev/null
@@ -0,0 +1,36 @@
+Device-Tree bindings for input/gpio_keys.c keyboard driver
+
+Required properties:
+       - compatible = "gpio-keys";
+
+Optional properties:
+       - autorepeat: Boolean, Enable auto repeat feature of Linux input
+         subsystem.
+
+Each button (key) is represented as a sub-node of "gpio-keys":
+Subnode properties:
+
+       - gpios: OF devcie-tree gpio specificatin.
+       - label: Descriptive name of the key.
+       - linux,code: Keycode to emit.
+
+Optional subnode-properties:
+       - linux,input-type: Specify event type this button/key generates.
+         If not specified defaults to <1> == EV_KEY.
+       - debounce-interval: Debouncing interval time in milliseconds.
+         If not specified defaults to 5.
+       - gpio-key,wakeup: Boolean, button can wake-up the system.
+
+Example nodes:
+
+       gpio_keys {
+                       compatible = "gpio-keys";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       autorepeat;
+                       button@21 {
+                               label = "GPIO Key UP";
+                               linux,code = <103>;
+                               gpios = <&gpio1 0 1>;
+                       };
+                       ...
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
new file mode 100644 (file)
index 0000000..2144af1
--- /dev/null
@@ -0,0 +1,14 @@
+* Freescale i.MX Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "fsl,<soc>-wdt"
+- reg : Should contain WDT registers location and length
+- interrupts : Should contain WDT interrupt
+
+Examples:
+
+wdt@73f98000 {
+       compatible = "fsl,imx51-wdt", "fsl,imx21-wdt";
+       reg = <0x73f98000 0x4000>;
+       interrupts = <58>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
new file mode 100644 (file)
index 0000000..79ead82
--- /dev/null
@@ -0,0 +1,11 @@
+* Samsung's Watchdog Timer Controller
+
+The Samsung's Watchdog controller is used for resuming system operation
+after a preset amount of time during which the WDT reset event has not
+occured.
+
+Required properties:
+- compatible : should be "samsung,s3c2410-wdt"
+- reg : base physical address of the controller and length of memory mapped
+       region.
+- interrupts : interrupt number to the cpu.
index 87019d2..09994c2 100644 (file)
@@ -92,7 +92,14 @@ For a filesystem to be exportable it must:
    1/ provide the filehandle fragment routines described below.
    2/ make sure that d_splice_alias is used rather than d_add
       when ->lookup finds an inode for a given parent and name.
-      Typically the ->lookup routine will end with a:
+
+      If inode is NULL, d_splice_alias(inode, dentry) is eqivalent to
+
+               d_add(dentry, inode), NULL
+
+      Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err)
+
+      Typically the ->lookup routine will simply end with a:
 
                return d_splice_alias(inode, dentry);
        }
index f0eee83..fc94770 100644 (file)
@@ -360,18 +360,20 @@ Each directory contains:
         A file recording the current state of the device in the array
        which can be a comma separated list of
              faulty   - device has been kicked from active use due to
-                         a detected fault
+                         a detected fault or it has unacknowledged bad
+                         blocks
              in_sync  - device is a fully in-sync member of the array
              writemostly - device will only be subject to read
                         requests if there are no other options.
                         This applies only to raid1 arrays.
-             blocked  - device has failed, metadata is "external",
-                        and the failure hasn't been acknowledged yet.
+             blocked  - device has failed, and the failure hasn't been
+                        acknowledged yet by the metadata handler.
                         Writes that would write to this device if
                         it were not faulty are blocked.
              spare    - device is working, but not a full member.
                         This includes spares that are in the process
                         of being recovered to
+             write_error - device has ever seen a write error.
        This list may grow in future.
        This can be written to.
        Writing "faulty"  simulates a failure on the device.
@@ -379,9 +381,11 @@ Each directory contains:
        Writing "writemostly" sets the writemostly flag.
        Writing "-writemostly" clears the writemostly flag.
        Writing "blocked" sets the "blocked" flag.
-       Writing "-blocked" clears the "blocked" flag and allows writes
-               to complete.
+       Writing "-blocked" clears the "blocked" flags and allows writes
+               to complete and possibly simulates an error.
        Writing "in_sync" sets the in_sync flag.
+       Writing "write_error" sets writeerrorseen flag.
+       Writing "-write_error" clears writeerrorseen flag.
 
        This file responds to select/poll. Any change to 'faulty'
        or 'blocked' causes an event.
@@ -419,7 +423,6 @@ Each directory contains:
         written, it will be rejected.
 
       recovery_start
-
         When the device is not 'in_sync', this records the number of
        sectors from the start of the device which are known to be
        correct.  This is normally zero, but during a recovery
@@ -435,6 +438,20 @@ Each directory contains:
        Setting this to 'none' is equivalent to setting 'in_sync'.
        Setting to any other value also clears the 'in_sync' flag.
        
+      bad_blocks
+       This gives the list of all known bad blocks in the form of
+       start address and length (in sectors respectively). If output
+       is too big to fit in a page, it will be truncated. Writing
+       "sector length" to this file adds new acknowledged (i.e.
+       recorded to disk safely) bad blocks.
+
+      unacknowledged_bad_blocks
+       This gives the list of known-but-not-yet-saved-to-disk bad
+       blocks in the same form of 'bad_blocks'. If output is too big
+       to fit in a page, it will be truncated. Writing to this file
+       adds bad blocks without acknowledging them. This is largely
+       for testing.
+
 
 
 An active md device will also contain and entry for each active device
diff --git a/Documentation/security/keys-ecryptfs.txt b/Documentation/security/keys-ecryptfs.txt
new file mode 100644 (file)
index 0000000..c3bbeba
--- /dev/null
@@ -0,0 +1,68 @@
+               Encrypted keys for the eCryptfs filesystem
+
+ECryptfs is a stacked filesystem which transparently encrypts and decrypts each
+file using a randomly generated File Encryption Key (FEK).
+
+Each FEK is in turn encrypted with a File Encryption Key Encryption Key (FEFEK)
+either in kernel space or in user space with a daemon called 'ecryptfsd'.  In
+the former case the operation is performed directly by the kernel CryptoAPI
+using a key, the FEFEK, derived from a user prompted passphrase;  in the latter
+the FEK is encrypted by 'ecryptfsd' with the help of external libraries in order
+to support other mechanisms like public key cryptography, PKCS#11 and TPM based
+operations.
+
+The data structure defined by eCryptfs to contain information required for the
+FEK decryption is called authentication token and, currently, can be stored in a
+kernel key of the 'user' type, inserted in the user's session specific keyring
+by the userspace utility 'mount.ecryptfs' shipped with the package
+'ecryptfs-utils'.
+
+The 'encrypted' key type has been extended with the introduction of the new
+format 'ecryptfs' in order to be used in conjunction with the eCryptfs
+filesystem.  Encrypted keys of the newly introduced format store an
+authentication token in its payload with a FEFEK randomly generated by the
+kernel and protected by the parent master key.
+
+In order to avoid known-plaintext attacks, the datablob obtained through
+commands 'keyctl print' or 'keyctl pipe' does not contain the overall
+authentication token, which content is well known, but only the FEFEK in
+encrypted form.
+
+The eCryptfs filesystem may really benefit from using encrypted keys in that the
+required key can be securely generated by an Administrator and provided at boot
+time after the unsealing of a 'trusted' key in order to perform the mount in a
+controlled environment.  Another advantage is that the key is not exposed to
+threats of malicious software, because it is available in clear form only at
+kernel level.
+
+Usage:
+   keyctl add encrypted name "new ecryptfs key-type:master-key-name keylen" ring
+   keyctl add encrypted name "load hex_blob" ring
+   keyctl update keyid "update key-type:master-key-name"
+
+name:= '<16 hexadecimal characters>'
+key-type:= 'trusted' | 'user'
+keylen:= 64
+
+
+Example of encrypted key usage with the eCryptfs filesystem:
+
+Create an encrypted key "1000100010001000" of length 64 bytes with format
+'ecryptfs' and save it using a previously loaded user key "test":
+
+    $ keyctl add encrypted 1000100010001000 "new ecryptfs user:test 64" @u
+    19184530
+
+    $ keyctl print 19184530
+    ecryptfs user:test 64 490045d4bfe48c99f0d465fbbbb79e7500da954178e2de0697
+    dd85091f5450a0511219e9f7cd70dcd498038181466f78ac8d4c19504fcc72402bfc41c2
+    f253a41b7507ccaa4b2b03fff19a69d1cc0b16e71746473f023a95488b6edfd86f7fdd40
+    9d292e4bacded1258880122dd553a661
+
+    $ keyctl pipe 19184530 > ecryptfs.blob
+
+Mount an eCryptfs filesystem using the created encrypted key "1000100010001000"
+into the '/secret' directory:
+
+    $ mount -i -t ecryptfs -oecryptfs_sig=1000100010001000,\
+      ecryptfs_cipher=aes,ecryptfs_key_bytes=32 /secret /secret
index 8fb79bc..5f50cca 100644 (file)
@@ -53,12 +53,19 @@ they are only as secure as the user key encrypting them.  The master user key
 should therefore be loaded in as secure a way as possible, preferably early in
 boot.
 
+The decrypted portion of encrypted keys can contain either a simple symmetric
+key or a more complex structure. The format of the more complex structure is
+application specific, which is identified by 'format'.
+
 Usage:
-  keyctl add encrypted name "new key-type:master-key-name keylen" ring
-  keyctl add encrypted name "load hex_blob" ring
-  keyctl update keyid "update key-type:master-key-name"
+    keyctl add encrypted name "new [format] key-type:master-key-name keylen"
+        ring
+    keyctl add encrypted name "load hex_blob" ring
+    keyctl update keyid "update key-type:master-key-name"
+
+format:= 'default | ecryptfs'
+key-type:= 'trusted' | 'user'
 
-where 'key-type' is either 'trusted' or 'user'.
 
 Examples of trusted and encrypted key usage:
 
@@ -114,15 +121,25 @@ Reseal a trusted key under new pcr values:
     7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
     df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
 
-Create and save an encrypted key "evm" using the above trusted key "kmk":
+The initial consumer of trusted keys is EVM, which at boot time needs a high
+quality symmetric key for HMAC protection of file metadata.  The use of a
+trusted key provides strong guarantees that the EVM key has not been
+compromised by a user level problem, and when sealed to specific boot PCR
+values, protects against boot and offline attacks.  Create and save an
+encrypted key "evm" using the above trusted key "kmk":
 
+option 1: omitting 'format'
     $ keyctl add encrypted evm "new trusted:kmk 32" @u
     159771175
 
+option 2: explicitly defining 'format' as 'default'
+    $ keyctl add encrypted evm "new default trusted:kmk 32" @u
+    159771175
+
     $ keyctl print 159771175
-    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
-    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
-    5972dcb82ab2dde83376d82b2e3c09ffc
+    default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+    82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+    24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
 
     $ keyctl pipe 159771175 > evm.blob
 
@@ -132,14 +149,11 @@ Load an encrypted key "evm" from saved blob:
     831684262
 
     $ keyctl print 831684262
-    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
-    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
-    5972dcb82ab2dde83376d82b2e3c09ffc
-
-
-The initial consumer of trusted keys is EVM, which at boot time needs a high
-quality symmetric key for HMAC protection of file metadata.  The use of a
-trusted key provides strong guarantees that the EVM key has not been
-compromised by a user level problem, and when sealed to specific boot PCR
-values, protects against boot and offline attacks.  Other uses for trusted and
-encrypted keys, such as for disk and file encryption are anticipated.
+    default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+    82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+    24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
+
+Other uses for trusted and encrypted keys, such as for disk and file encryption
+are anticipated.  In particular the new format 'ecryptfs' has been defined in
+in order to use encrypted keys to mount an eCryptfs filesystem.  More details
+about the usage can be found in the file 'Documentation/keys-ecryptfs.txt'.
index ee99451..fc51128 100644 (file)
@@ -8,6 +8,8 @@ src/
        - directory holding watchdog related example programs.
 watchdog-api.txt
        - description of the Linux Watchdog driver API.
+watchdog-kernel-api.txt
+       - description of the Linux WatchDog Timer Driver Core kernel API.
 watchdog-parameters.txt
        - information on driver parameters (for drivers other than
          the ones that have driver-specific files here)
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
new file mode 100644 (file)
index 0000000..4f7c894
--- /dev/null
@@ -0,0 +1,162 @@
+The Linux WatchDog Timer Driver Core kernel API.
+===============================================
+Last reviewed: 22-Jul-2011
+
+Wim Van Sebroeck <wim@iguana.be>
+
+Introduction
+------------
+This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
+It also does not describe the API which can be used by user space to communicate
+with a WatchDog Timer. If you want to know this then please read the following
+file: Documentation/watchdog/watchdog-api.txt .
+
+So what does this document describe? It describes the API that can be used by
+WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
+Framework. This framework provides all interfacing towards user space so that
+the same code does not have to be reproduced each time. This also means that
+a watchdog timer driver then only needs to provide the different routines
+(operations) that control the watchdog timer (WDT).
+
+The API
+-------
+Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
+must #include <linux/watchdog.h> (you would have to do this anyway when
+writing a watchdog device driver). This include file contains following
+register/unregister routines:
+
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
+The watchdog_register_device routine registers a watchdog timer device.
+The parameter of this routine is a pointer to a watchdog_device structure.
+This routine returns zero on success and a negative errno code for failure.
+
+The watchdog_unregister_device routine deregisters a registered watchdog timer
+device. The parameter of this routine is the pointer to the registered
+watchdog_device structure.
+
+The watchdog device structure looks like this:
+
+struct watchdog_device {
+       const struct watchdog_info *info;
+       const struct watchdog_ops *ops;
+       unsigned int bootstatus;
+       unsigned int timeout;
+       unsigned int min_timeout;
+       unsigned int max_timeout;
+       void *driver_data;
+       unsigned long status;
+};
+
+It contains following fields:
+* info: a pointer to a watchdog_info structure. This structure gives some
+  additional information about the watchdog timer itself. (Like it's unique name)
+* ops: a pointer to the list of watchdog operations that the watchdog supports.
+* timeout: the watchdog timer's timeout value (in seconds).
+* min_timeout: the watchdog timer's minimum timeout value (in seconds).
+* max_timeout: the watchdog timer's maximum timeout value (in seconds).
+* bootstatus: status of the device after booting (reported with watchdog
+  WDIOF_* status bits).
+* driver_data: a pointer to the drivers private data of a watchdog device.
+  This data should only be accessed via the watchdog_set_drvadata and
+  watchdog_get_drvdata routines.
+* status: this field contains a number of status bits that give extra
+  information about the status of the device (Like: is the watchdog timer
+  running/active, is the nowayout bit set, is the device opened via
+  the /dev/watchdog interface or not, ...).
+
+The list of watchdog operations is defined as:
+
+struct watchdog_ops {
+       struct module *owner;
+       /* mandatory operations */
+       int (*start)(struct watchdog_device *);
+       int (*stop)(struct watchdog_device *);
+       /* optional operations */
+       int (*ping)(struct watchdog_device *);
+       unsigned int (*status)(struct watchdog_device *);
+       int (*set_timeout)(struct watchdog_device *, unsigned int);
+       long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+It is important that you first define the module owner of the watchdog timer
+driver's operations. This module owner will be used to lock the module when
+the watchdog is active. (This to avoid a system crash when you unload the
+module and /dev/watchdog is still open).
+Some operations are mandatory and some are optional. The mandatory operations
+are:
+* start: this is a pointer to the routine that starts the watchdog timer
+  device.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+* stop: with this routine the watchdog timer device is being stopped.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Some watchdog timer hardware can only be started and not be stopped. The
+  driver supporting this hardware needs to make sure that a start and stop
+  routine is being provided. This can be done by using a timer in the driver
+  that regularly sends a keepalive ping to the watchdog timer hardware.
+
+Not all watchdog timer hardware supports the same functionality. That's why
+all other routines/operations are optional. They only need to be provided if
+they are supported. These optional routines/operations are:
+* ping: this is the routine that sends a keepalive ping to the watchdog timer
+  hardware.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Most hardware that does not support this as a separate function uses the
+  start function to restart the watchdog timer hardware. And that's also what
+  the watchdog timer driver core does: to send a keepalive ping to the watchdog
+  timer hardware it will either use the ping operation (when available) or the
+  start operation (when the ping operation is not available).
+  (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
+  WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
+  info structure).
+* status: this routine checks the status of the watchdog timer device. The
+  status of the device is reported with watchdog WDIOF_* status flags/bits.
+* set_timeout: this routine checks and changes the timeout of the watchdog
+  timer device. It returns 0 on success, -EINVAL for "parameter out of range"
+  and -EIO for "could not write value to the watchdog". On success the timeout
+  value of the watchdog_device will be changed to the value that was just used
+  to re-program the watchdog timer device.
+  (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
+  watchdog's info structure).
+* ioctl: if this routine is present then it will be called first before we do
+  our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
+  if a command is not supported. The parameters that are passed to the ioctl
+  call are: watchdog_device, cmd and arg.
+
+The status bits should (preferably) be set with the set_bit and clear_bit alike
+bit-operations. The status bits that are defined are:
+* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
+  is active or not. When the watchdog is active after booting, then you should
+  set this status bit (Note: when you register the watchdog timer device with
+  this bit set, then opening /dev/watchdog will skip the start operation)
+* WDOG_DEV_OPEN: this status bit shows whether or not the watchdog device
+  was opened via /dev/watchdog.
+  (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_ALLOW_RELEASE: this bit stores whether or not the magic close character
+  has been sent (so that we can support the magic close feature).
+  (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
+  If this bit is set then the watchdog timer will not be able to stop.
+
+Note: The WatchDog Timer Driver Core supports the magic close feature and
+the nowayout feature. To use the magic close feature you must set the
+WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
+The nowayout feature will overrule the magic close feature.
+
+To get or set driver specific data the following two helper functions should be
+used:
+
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+
+The watchdog_set_drvdata function allows you to add driver specific data. The
+arguments of this function are the watchdog device where you want to add the
+driver specific data to and a pointer to the data itself.
+
+The watchdog_get_drvdata function allows you to retrieve driver specific data.
+The argument of this function is the watchdog device where you want to retrieve
+data from. The function retruns the pointer to the driver specific data.
index 7b2e9e8..1d2e79d 100644 (file)
@@ -6408,7 +6408,7 @@ L:        tomoyo-users-en@lists.sourceforge.jp (subscribers-only, for users in English)
 L:     tomoyo-dev@lists.sourceforge.jp (subscribers-only, for developers in Japanese)
 L:     tomoyo-users@lists.sourceforge.jp (subscribers-only, for users in Japanese)
 W:     http://tomoyo.sourceforge.jp/
-T:     quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.3.x/tomoyo-lsm/patches/
+T:     quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.4.x/tomoyo-lsm/patches/
 S:     Maintained
 F:     security/tomoyo/
 
index 4b8abf9..d82ebab 100644 (file)
@@ -27,14 +27,14 @@ comment "Tegra board type"
 
 config MACH_HARMONY
        bool "Harmony board"
-       select MACH_HAS_SND_SOC_TEGRA_WM8903
+       select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
        help
          Support for nVidia Harmony development platform
 
 config MACH_KAEN
        bool "Kaen board"
        select MACH_SEABOARD
-       select MACH_HAS_SND_SOC_TEGRA_WM8903
+       select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
        help
          Support for the Kaen version of Seaboard
 
@@ -45,7 +45,7 @@ config MACH_PAZ00
 
 config MACH_SEABOARD
        bool "Seaboard board"
-       select MACH_HAS_SND_SOC_TEGRA_WM8903
+       select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
        help
          Support for nVidia Seaboard development platform. It will
         also be included for some of the derivative boards that
index c550c67..397268c 100644 (file)
@@ -3,4 +3,4 @@
 #
 
 # Common support
-obj-y                          := common.o timer.o board_dt.o
+obj-y                          := common.o timer.o
diff --git a/arch/arm/mach-zynq/board_dt.c b/arch/arm/mach-zynq/board_dt.c
deleted file mode 100644 (file)
index e69de29..0000000
index d8f0133..7d6831a 100644 (file)
@@ -38,6 +38,7 @@ struct cpuinfo {
        u32 use_exc;
        u32 ver_code;
        u32 mmu;
+       u32 mmu_privins;
        u32 endian;
 
        /* CPU caches */
index c4532f0..c9a6262 100644 (file)
@@ -14,7 +14,7 @@
 
 #if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
        unsigned long flags;
        asm volatile("  msrclr %0, %1   \n"
@@ -25,7 +25,7 @@ static inline unsigned long arch_local_irq_save(void)
        return flags;
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
        /* this uses r0 without declaring it - is that correct? */
        asm volatile("  msrclr r0, %0   \n"
@@ -35,7 +35,7 @@ static inline void arch_local_irq_disable(void)
                     : "memory");
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
        /* this uses r0 without declaring it - is that correct? */
        asm volatile("  msrset  r0, %0  \n"
@@ -47,7 +47,7 @@ static inline void arch_local_irq_enable(void)
 
 #else /* !CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
        unsigned long flags, tmp;
        asm volatile (" mfs     %0, rmsr        \n"
@@ -61,7 +61,7 @@ static inline unsigned long arch_local_irq_save(void)
        return flags;
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
        unsigned long tmp;
        asm volatile("  mfs     %0, rmsr        \n"
@@ -74,7 +74,7 @@ static inline void arch_local_irq_disable(void)
                     : "memory");
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
        unsigned long tmp;
        asm volatile("  mfs     %0, rmsr        \n"
@@ -89,7 +89,7 @@ static inline void arch_local_irq_enable(void)
 
 #endif /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
        unsigned long flags;
        asm volatile("  mfs     %0, rmsr        \n"
@@ -100,7 +100,7 @@ static inline unsigned long arch_local_save_flags(void)
        return flags;
 }
 
-static inline void arch_local_irq_restore(unsigned long flags)
+static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
        asm volatile("  mts     rmsr, %0        \n"
                     "  nop                     \n"
@@ -109,12 +109,12 @@ static inline void arch_local_irq_restore(unsigned long flags)
                     : "memory");
 }
 
-static inline bool arch_irqs_disabled_flags(unsigned long flags)
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
        return (flags & MSR_IE) == 0;
 }
 
-static inline bool arch_irqs_disabled(void)
+static inline notrace bool arch_irqs_disabled(void)
 {
        return arch_irqs_disabled_flags(arch_local_save_flags());
 }
index aed2a6b..7283bfb 100644 (file)
@@ -125,9 +125,6 @@ struct thread_struct {
        .pgdir = swapper_pg_dir, \
 }
 
-/* Do necessary setup to start up a newly executed thread.  */
-void start_thread(struct pt_regs *regs,
-               unsigned long pc, unsigned long usp);
 
 /* Free all resources held by a thread. */
 extern inline void release_thread(struct task_struct *dead_task)
index 9ad567e..20c5e8e 100644 (file)
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
 /* Other Prototypes */
-extern int early_uartlite_console(void);
-extern int early_uart16550_console(void);
+enum early_consoles {
+       UARTLITE = 1,
+       UART16550 = 2,
+};
+
+extern int of_early_console(void *version);
 
 /*
  * OF address retreival & translation
index a10bec6..4bbdb4c 100644 (file)
@@ -111,16 +111,16 @@ struct pvr_s {
 /* Target family PVR mask */
 #define PVR10_TARGET_FAMILY_MASK       0xFF000000
 
-/* MMU descrtiption */
+/* MMU description */
 #define PVR11_USE_MMU                  0xC0000000
 #define PVR11_MMU_ITLB_SIZE            0x38000000
 #define PVR11_MMU_DTLB_SIZE            0x07000000
 #define PVR11_MMU_TLB_ACCESS           0x00C00000
 #define PVR11_MMU_ZONES                        0x003C0000
+#define PVR11_MMU_PRIVINS              0x00010000
 /* MSR Reset value PVR mask */
 #define PVR11_MSR_RESET_VALUE_MASK     0x000007FF
 
-
 /* PVR access macros */
 #define PVR_IS_FULL(_pvr)      (_pvr.pvr[0] & PVR0_PVR_FULL_MASK)
 #define PVR_USE_BARREL(_pvr)   (_pvr.pvr[0] & PVR0_USE_BARREL_MASK)
@@ -216,6 +216,7 @@ struct pvr_s {
 #define PVR_MMU_DTLB_SIZE(_pvr)                (_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
 #define PVR_MMU_TLB_ACCESS(_pvr)       (_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
 #define PVR_MMU_ZONES(_pvr)            (_pvr.pvr[11] & PVR11_MMU_ZONES)
+#define PVR_MMU_PRIVINS(pvr)           (pvr.pvr[11] & PVR11_MMU_PRIVINS)
 
 /* endian */
 #define PVR_ENDIAN(_pvr)       (_pvr.pvr[0] & PVR0_ENDI)
index 8f39689..904e5ef 100644 (file)
@@ -23,6 +23,7 @@ extern char cmd_line[COMMAND_LINE_SIZE];
 void early_printk(const char *fmt, ...);
 
 int setup_early_printk(char *opt);
+void remap_early_printk(void);
 void disable_early_printk(void);
 
 #if defined(CONFIG_EARLY_PRINTK)
index f70a604..916aaed 100644 (file)
@@ -72,6 +72,7 @@ void set_cpuinfo_pvr_full(struct cpuinfo *ci, struct device_node *cpu)
        CI(pvr_user2, USER2);
 
        CI(mmu, USE_MMU);
+       CI(mmu_privins, MMU_PRIVINS);
        CI(endian, ENDIAN);
 
        CI(use_icache, USE_ICACHE);
index b16b994..592bb2e 100644 (file)
@@ -119,6 +119,7 @@ void __init set_cpuinfo_static(struct cpuinfo *ci, struct device_node *cpu)
        ci->pvr_user2 = fcpu(cpu, "xlnx,pvr-user2");
 
        ci->mmu = fcpu(cpu, "xlnx,use-mmu");
+       ci->mmu_privins = fcpu(cpu, "xlnx,mmu-privileged-instr");
        ci->endian = fcpu(cpu, "xlnx,endianness");
 
        ci->ver_code = 0;
index c1640c5..44394d8 100644 (file)
@@ -88,4 +88,8 @@ void __init setup_cpuinfo(void)
                printk(KERN_WARNING "%s: Unsupported PVR setting\n", __func__);
                set_cpuinfo_static(&cpuinfo, cpu);
        }
+
+       if (cpuinfo.mmu_privins)
+               printk(KERN_WARNING "%s: Stream instructions enabled"
+                       " - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
 }
index b4048af..7b5dca7 100644 (file)
@@ -97,6 +97,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                (cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
                (cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
 
+       count += seq_printf(m,
+                       "Stream-insns:\t%sprivileged\n",
+                       cpuinfo.mmu_privins ? "un" : "");
+
        if (cpuinfo.use_icache)
                count += seq_printf(m,
                                "Icache:\t\t%ukB\tline length:\t%dB\n",
@@ -110,10 +114,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                                "Dcache:\t\t%ukB\tline length:\t%dB\n",
                                cpuinfo.dcache_size >> 10,
                                cpuinfo.dcache_line_length);
+               seq_printf(m, "Dcache-Policy:\t");
                if (cpuinfo.dcache_wb)
-                       count += seq_printf(m, "\t\twrite-back\n");
+                       count += seq_printf(m, "write-back\n");
                else
-                       count += seq_printf(m, "\t\twrite-through\n");
+                       count += seq_printf(m, "write-through\n");
        } else
                count += seq_printf(m, "Dcache:\t\tno\n");
 
index c3616a0..d26d92d 100644 (file)
@@ -35,7 +35,7 @@ static void early_printk_uartlite_putc(char c)
         * we'll never timeout on a working UART.
         */
 
-       unsigned retries = 10000;
+       unsigned retries = 1000000;
        /* read status bit - 0x8 offset */
        while (--retries && (in_be32(base_addr + 8) & (1 << 3)))
                ;
@@ -60,7 +60,7 @@ static void early_printk_uartlite_write(struct console *unused,
 static struct console early_serial_uartlite_console = {
        .name = "earlyser",
        .write = early_printk_uartlite_write,
-       .flags = CON_PRINTBUFFER,
+       .flags = CON_PRINTBUFFER | CON_BOOT,
        .index = -1,
 };
 #endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */
@@ -104,7 +104,7 @@ static void early_printk_uart16550_write(struct console *unused,
 static struct console early_serial_uart16550_console = {
        .name = "earlyser",
        .write = early_printk_uart16550_write,
-       .flags = CON_PRINTBUFFER,
+       .flags = CON_PRINTBUFFER | CON_BOOT,
        .index = -1,
 };
 #endif /* CONFIG_SERIAL_8250_CONSOLE */
@@ -127,48 +127,56 @@ void early_printk(const char *fmt, ...)
 
 int __init setup_early_printk(char *opt)
 {
+       int version = 0;
+
        if (early_console_initialized)
                return 1;
 
-#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
-       base_addr = early_uartlite_console();
+       base_addr = of_early_console(&version);
        if (base_addr) {
-               early_console_initialized = 1;
 #ifdef CONFIG_MMU
                early_console_reg_tlb_alloc(base_addr);
 #endif
-               early_console = &early_serial_uartlite_console;
-               early_printk("early_printk_console is enabled at 0x%08x\n",
-                                                       base_addr);
-
-               /* register_console(early_console); */
-
-               return 0;
-       }
-#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */
-
+               switch (version) {
+#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
+               case UARTLITE:
+                       printk(KERN_INFO "Early console on uartlite "
+                                               "at 0x%08x\n", base_addr);
+                       early_console = &early_serial_uartlite_console;
+                       break;
+#endif
 #ifdef CONFIG_SERIAL_8250_CONSOLE
-       base_addr = early_uart16550_console();
-       base_addr &= ~3; /* clear register offset */
-       if (base_addr) {
-               early_console_initialized = 1;
-#ifdef CONFIG_MMU
-               early_console_reg_tlb_alloc(base_addr);
+               case UART16550:
+                       printk(KERN_INFO "Early console on uart16650 "
+                                               "at 0x%08x\n", base_addr);
+                       early_console = &early_serial_uart16550_console;
+                       break;
 #endif
-               early_console = &early_serial_uart16550_console;
-
-               early_printk("early_printk_console is enabled at 0x%08x\n",
-                                                       base_addr);
-
-               /* register_console(early_console); */
+               default:
+                       printk(KERN_INFO  "Unsupported early console %d\n",
+                                                               version);
+                       return 1;
+               }
 
+               register_console(early_console);
+               early_console_initialized = 1;
                return 0;
        }
-#endif /* CONFIG_SERIAL_8250_CONSOLE */
-
        return 1;
 }
 
+/* Remap early console to virtual address and do not allocate one TLB
+ * only for early console because of performance degression */
+void __init remap_early_printk(void)
+{
+       if (!early_console_initialized || !early_console)
+               return;
+       printk(KERN_INFO "early_printk_console remaping from 0x%x to ",
+                                                               base_addr);
+       base_addr = (u32) ioremap(base_addr, PAGE_SIZE);
+       printk(KERN_CONT "0x%x\n", base_addr);
+}
+
 void __init disable_early_printk(void)
 {
        if (!early_console_initialized || !early_console)
index 56572e9..e62be83 100644 (file)
@@ -1113,23 +1113,23 @@ lw_r10_vm:      R3_TO_LWREG_VM_V        (10);
 lw_r11_vm:     R3_TO_LWREG_VM_V        (11);
 lw_r12_vm:     R3_TO_LWREG_VM_V        (12);
 lw_r13_vm:     R3_TO_LWREG_VM_V        (13);
-lw_r14_vm:     R3_TO_LWREG_VM          (14);
+lw_r14_vm:     R3_TO_LWREG_VM_V        (14);
 lw_r15_vm:     R3_TO_LWREG_VM_V        (15);
-lw_r16_vm:     R3_TO_LWREG_VM          (16);
+lw_r16_vm:     R3_TO_LWREG_VM_V        (16);
 lw_r17_vm:     R3_TO_LWREG_VM_V        (17);
 lw_r18_vm:     R3_TO_LWREG_VM_V        (18);
-lw_r19_vm:     R3_TO_LWREG_VM          (19);
-lw_r20_vm:     R3_TO_LWREG_VM          (20);
-lw_r21_vm:     R3_TO_LWREG_VM          (21);
-lw_r22_vm:     R3_TO_LWREG_VM          (22);
-lw_r23_vm:     R3_TO_LWREG_VM          (23);
-lw_r24_vm:     R3_TO_LWREG_VM          (24);
-lw_r25_vm:     R3_TO_LWREG_VM          (25);
-lw_r26_vm:     R3_TO_LWREG_VM          (26);
-lw_r27_vm:     R3_TO_LWREG_VM          (27);
-lw_r28_vm:     R3_TO_LWREG_VM          (28);
-lw_r29_vm:     R3_TO_LWREG_VM          (29);
-lw_r30_vm:     R3_TO_LWREG_VM          (30);
+lw_r19_vm:     R3_TO_LWREG_VM_V        (19);
+lw_r20_vm:     R3_TO_LWREG_VM_V        (20);
+lw_r21_vm:     R3_TO_LWREG_VM_V        (21);
+lw_r22_vm:     R3_TO_LWREG_VM_V        (22);
+lw_r23_vm:     R3_TO_LWREG_VM_V        (23);
+lw_r24_vm:     R3_TO_LWREG_VM_V        (24);
+lw_r25_vm:     R3_TO_LWREG_VM_V        (25);
+lw_r26_vm:     R3_TO_LWREG_VM_V        (26);
+lw_r27_vm:     R3_TO_LWREG_VM_V        (27);
+lw_r28_vm:     R3_TO_LWREG_VM_V        (28);
+lw_r29_vm:     R3_TO_LWREG_VM_V        (29);
+lw_r30_vm:     R3_TO_LWREG_VM_V        (30);
 lw_r31_vm:     R3_TO_LWREG_VM_V        (31);
 
 sw_table_vm:
@@ -1147,23 +1147,23 @@ sw_r10_vm:      SWREG_TO_R3_VM_V        (10);
 sw_r11_vm:     SWREG_TO_R3_VM_V        (11);
 sw_r12_vm:     SWREG_TO_R3_VM_V        (12);
 sw_r13_vm:     SWREG_TO_R3_VM_V        (13);
-sw_r14_vm:     SWREG_TO_R3_VM          (14);
+sw_r14_vm:     SWREG_TO_R3_VM_V        (14);
 sw_r15_vm:     SWREG_TO_R3_VM_V        (15);
-sw_r16_vm:     SWREG_TO_R3_VM          (16);
+sw_r16_vm:     SWREG_TO_R3_VM_V        (16);
 sw_r17_vm:     SWREG_TO_R3_VM_V        (17);
 sw_r18_vm:     SWREG_TO_R3_VM_V        (18);
-sw_r19_vm:     SWREG_TO_R3_VM          (19);
-sw_r20_vm:     SWREG_TO_R3_VM          (20);
-sw_r21_vm:     SWREG_TO_R3_VM          (21);
-sw_r22_vm:     SWREG_TO_R3_VM          (22);
-sw_r23_vm:     SWREG_TO_R3_VM          (23);
-sw_r24_vm:     SWREG_TO_R3_VM          (24);
-sw_r25_vm:     SWREG_TO_R3_VM          (25);
-sw_r26_vm:     SWREG_TO_R3_VM          (26);
-sw_r27_vm:     SWREG_TO_R3_VM          (27);
-sw_r28_vm:     SWREG_TO_R3_VM          (28);
-sw_r29_vm:     SWREG_TO_R3_VM          (29);
-sw_r30_vm:     SWREG_TO_R3_VM          (30);
+sw_r19_vm:     SWREG_TO_R3_VM_V        (19);
+sw_r20_vm:     SWREG_TO_R3_VM_V        (20);
+sw_r21_vm:     SWREG_TO_R3_VM_V        (21);
+sw_r22_vm:     SWREG_TO_R3_VM_V        (22);
+sw_r23_vm:     SWREG_TO_R3_VM_V        (23);
+sw_r24_vm:     SWREG_TO_R3_VM_V        (24);
+sw_r25_vm:     SWREG_TO_R3_VM_V        (25);
+sw_r26_vm:     SWREG_TO_R3_VM_V        (26);
+sw_r27_vm:     SWREG_TO_R3_VM_V        (27);
+sw_r28_vm:     SWREG_TO_R3_VM_V        (28);
+sw_r29_vm:     SWREG_TO_R3_VM_V        (29);
+sw_r30_vm:     SWREG_TO_R3_VM_V        (30);
 sw_r31_vm:     SWREG_TO_R3_VM_V        (31);
 #endif /* CONFIG_MMU */
 
index c88f066..eb41441 100644 (file)
@@ -134,7 +134,7 @@ void __init init_IRQ(void)
        intr_type =
                be32_to_cpup(of_get_property(intc,
                                                "xlnx,kind-of-intr", NULL));
-       if (intr_type >= (1 << (nr_irq + 1)))
+       if (intr_type > (u32)((1ULL << nr_irq) - 1))
                printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n");
 
 #ifdef CONFIG_SELFMOD_INTC
index 968648a..dbb8124 100644 (file)
@@ -237,7 +237,6 @@ unsigned long get_wchan(struct task_struct *p)
 /* Set up a thread for executing a new program */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
 {
-       set_fs(USER_DS);
        regs->pc = pc;
        regs->r1 = usp;
        regs->pt_mode = 0;
index b15cc21..977484a 100644 (file)
@@ -53,69 +53,58 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 }
 
 #ifdef CONFIG_EARLY_PRINTK
-/* MS this is Microblaze specifig function */
-static int __init early_init_dt_scan_serial(unsigned long node,
-                               const char *uname, int depth, void *data)
-{
-       unsigned long l;
-       char *p;
-       const __be32 *addr;
-
-       pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname);
-
-/* find all serial nodes */
-       if (strncmp(uname, "serial", 6) != 0)
-               return 0;
-
-/* find compatible node with uartlite */
-       p = of_get_flat_dt_prop(node, "compatible", &l);
-       if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) &&
-                       (strncmp(p, "xlnx,opb-uartlite", 17) != 0) &&
-                       (strncmp(p, "xlnx,axi-uartlite", 17) != 0))
-               return 0;
-
-       addr = of_get_flat_dt_prop(node, "reg", &l);
-       return be32_to_cpup(addr); /* return address */
-}
+char *stdout;
 
-/* this function is looking for early uartlite console - Microblaze specific */
-int __init early_uartlite_console(void)
-{
-       return of_scan_flat_dt(early_init_dt_scan_serial, NULL);
-}
-
-/* MS this is Microblaze specifig function */
-static int __init early_init_dt_scan_serial_full(unsigned long node,
+int __init early_init_dt_scan_chosen_serial(unsigned long node,
                                const char *uname, int depth, void *data)
 {
        unsigned long l;
        char *p;
-       unsigned int addr;
-
-       pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
-
-/* find all serial nodes */
-       if (strncmp(uname, "serial", 6) != 0)
-               return 0;
 
-       early_init_dt_check_for_initrd(node);
-
-/* find compatible node with uartlite */
-       p = of_get_flat_dt_prop(node, "compatible", &l);
-
-       if ((strncmp(p, "xlnx,xps-uart16550", 18) != 0) &&
-               (strncmp(p, "xlnx,axi-uart16550", 18) != 0))
-               return 0;
-
-       addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l);
-       addr += *(u32 *)of_get_flat_dt_prop(node, "reg-offset", &l);
-       return be32_to_cpu(addr); /* return address */
+       pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname);
+
+       if (depth == 1 && (strcmp(uname, "chosen") == 0 ||
+                               strcmp(uname, "chosen@0") == 0)) {
+               p = of_get_flat_dt_prop(node, "linux,stdout-path", &l);
+               if (p != NULL && l > 0)
+                       stdout = p; /* store pointer to stdout-path */
+       }
+
+       if (stdout && strstr(stdout, uname)) {
+               p = of_get_flat_dt_prop(node, "compatible", &l);
+               pr_debug("Compatible string: %s\n", p);
+
+               if ((strncmp(p, "xlnx,xps-uart16550", 18) == 0) ||
+                       (strncmp(p, "xlnx,axi-uart16550", 18) == 0)) {
+                       unsigned int addr;
+
+                       *(u32 *)data = UART16550;
+
+                       addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l);
+                       addr += *(u32 *)of_get_flat_dt_prop(node,
+                                                       "reg-offset", &l);
+                       /* clear register offset */
+                       return be32_to_cpu(addr) & ~3;
+               }
+               if ((strncmp(p, "xlnx,xps-uartlite", 17) == 0) ||
+                               (strncmp(p, "xlnx,opb-uartlite", 17) == 0) ||
+                               (strncmp(p, "xlnx,axi-uartlite", 17) == 0) ||
+                               (strncmp(p, "xlnx,mdm", 8) == 0)) {
+                       unsigned int *addrp;
+
+                       *(u32 *)data = UARTLITE;
+
+                       addrp = of_get_flat_dt_prop(node, "reg", &l);
+                       return be32_to_cpup(addrp); /* return address */
+               }
+       }
+       return 0;
 }
 
-/* this function is looking for early uartlite console - Microblaze specific */
-int __init early_uart16550_console(void)
+/* this function is looking for early console - Microblaze specific */
+int __init of_early_console(void *version)
 {
-       return of_scan_flat_dt(early_init_dt_scan_serial_full, NULL);
+       return of_scan_flat_dt(early_init_dt_scan_chosen_serial, version);
 }
 #endif
 
index 8e2c09b..0e654a1 100644 (file)
@@ -59,6 +59,11 @@ void __init setup_arch(char **cmdline_p)
 
        setup_memory();
 
+#ifdef CONFIG_EARLY_PRINTK
+       /* remap early console to virtual address */
+       remap_early_printk();
+#endif
+
        xilinx_pci_init();
 
 #if defined(CONFIG_SELFMOD_INTC) || defined(CONFIG_SELFMOD_TIMER)
index cfa9cd2..64f7a00 100644 (file)
@@ -177,9 +177,11 @@ static inline unsigned int sparc64_elf_hwcap(void)
                cap |= HWCAP_SPARC_ULTRA3;
        else if (tlb_type == hypervisor) {
                if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
-                   sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
+                   sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+                   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
                        cap |= HWCAP_SPARC_BLKINIT;
-               if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
+               if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+                   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
                        cap |= HWCAP_SPARC_N2;
        }
 
index 7568640..7a5f80d 100644 (file)
@@ -2950,6 +2950,7 @@ extern unsigned long sun4v_ncs_request(unsigned long request,
 #define HV_GRP_N2_CPU                  0x0202
 #define HV_GRP_NIU                     0x0204
 #define HV_GRP_VF_CPU                  0x0205
+#define HV_GRP_KT_CPU                  0x0209
 #define HV_GRP_DIAG                    0x0300
 
 #ifndef __ASSEMBLY__
index f0d0c40..55a17c6 100644 (file)
@@ -42,6 +42,7 @@
 #define SUN4V_CHIP_INVALID     0x00
 #define SUN4V_CHIP_NIAGARA1    0x01
 #define SUN4V_CHIP_NIAGARA2    0x02
+#define SUN4V_CHIP_NIAGARA3    0x03
 #define SUN4V_CHIP_UNKNOWN     0xff
 
 #ifndef __ASSEMBLY__
index bee4bf4..9ed6ff6 100644 (file)
@@ -65,6 +65,7 @@ static struct xor_block_template xor_block_niagara = {
 #define XOR_SELECT_TEMPLATE(FASTEST) \
        ((tlb_type == hypervisor && \
          (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
-          sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \
+          sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \
+          sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \
         &xor_block_niagara : \
         &xor_block_VIS)
index 138dbbc..17cf290 100644 (file)
@@ -474,11 +474,18 @@ static void __init sun4v_cpu_probe(void)
                sparc_pmu_type = "niagara2";
                break;
 
+       case SUN4V_CHIP_NIAGARA3:
+               sparc_cpu_type = "UltraSparc T3 (Niagara3)";
+               sparc_fpu_type = "UltraSparc T3 integrated FPU";
+               sparc_pmu_type = "niagara3";
+               break;
+
        default:
                printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
                       prom_cpu_compatible);
                sparc_cpu_type = "Unknown SUN4V CPU";
                sparc_fpu_type = "Unknown SUN4V FPU";
+               sparc_pmu_type = "Unknown SUN4V PMU";
                break;
        }
 }
index d91fd78..4197e8d 100644 (file)
@@ -324,6 +324,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
        switch (sun4v_chip_type) {
        case SUN4V_CHIP_NIAGARA1:
        case SUN4V_CHIP_NIAGARA2:
+       case SUN4V_CHIP_NIAGARA3:
                rover_inc_table = niagara_iterate_method;
                break;
        default:
index aa594c7..c752603 100644 (file)
@@ -132,6 +132,8 @@ prom_sun4v_name:
        .asciz  "sun4v"
 prom_niagara_prefix:
        .asciz  "SUNW,UltraSPARC-T"
+prom_sparc_prefix:
+       .asciz  "SPARC-T"
        .align  4
 prom_root_compatible:
        .skip   64
@@ -382,6 +384,22 @@ sun4v_chip_type:
 90:    ldub    [%g7], %g2
        ldub    [%g1], %g4
        cmp     %g2, %g4
+       bne,pn  %icc, 89f
+        add    %g7, 1, %g7
+       subcc   %g3, 1, %g3
+       bne,pt  %xcc, 90b
+        add    %g1, 1, %g1
+       ba,pt   %xcc, 91f
+        nop
+
+89:    sethi   %hi(prom_cpu_compatible), %g1
+       or      %g1, %lo(prom_cpu_compatible), %g1
+       sethi   %hi(prom_sparc_prefix), %g7
+       or      %g7, %lo(prom_sparc_prefix), %g7
+       mov     7, %g3
+90:    ldub    [%g7], %g2
+       ldub    [%g1], %g4
+       cmp     %g2, %g4
        bne,pn  %icc, 4f
         add    %g7, 1, %g7
        subcc   %g3, 1, %g3
@@ -390,6 +408,15 @@ sun4v_chip_type:
 
        sethi   %hi(prom_cpu_compatible), %g1
        or      %g1, %lo(prom_cpu_compatible), %g1
+       ldub    [%g1 + 7], %g2
+       cmp     %g2, '3'
+       be,pt   %xcc, 5f
+        mov    SUN4V_CHIP_NIAGARA3, %g4
+       ba,pt   %xcc, 4f
+        nop
+
+91:    sethi   %hi(prom_cpu_compatible), %g1
+       or      %g1, %lo(prom_cpu_compatible), %g1
        ldub    [%g1 + 17], %g2
        cmp     %g2, '1'
        be,pt   %xcc, 5f
@@ -397,6 +424,7 @@ sun4v_chip_type:
        cmp     %g2, '2'
        be,pt   %xcc, 5f
         mov    SUN4V_CHIP_NIAGARA2, %g4
+       
 4:
        mov     SUN4V_CHIP_UNKNOWN, %g4
 5:     sethi   %hi(sun4v_chip_type), %g2
@@ -514,6 +542,9 @@ niagara_tlb_fixup:
         cmp    %g1, SUN4V_CHIP_NIAGARA2
        be,pt   %xcc, niagara2_patch
         nop
+       cmp     %g1, SUN4V_CHIP_NIAGARA3
+       be,pt   %xcc, niagara2_patch
+        nop
 
        call    generic_patch_copyops
         nop
index 7c60afb..d306e64 100644 (file)
@@ -38,6 +38,7 @@ static struct api_info api_table[] = {
        { .group = HV_GRP_N2_CPU,                               },
        { .group = HV_GRP_NIU,                                  },
        { .group = HV_GRP_VF_CPU,                               },
+       { .group = HV_GRP_KT_CPU,                               },
        { .group = HV_GRP_DIAG,         .flags = FLAG_PRE_API   },
 };
 
index 8ac23e6..343b0f9 100644 (file)
@@ -80,8 +80,11 @@ static void n2_pcr_write(u64 val)
 {
        unsigned long ret;
 
-       ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
-       if (ret != HV_EOK)
+       if (val & PCR_N2_HTRACE) {
+               ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
+               if (ret != HV_EOK)
+                       write_pcr(val);
+       } else
                write_pcr(val);
 }
 
@@ -106,6 +109,10 @@ static int __init register_perf_hsvc(void)
                        perf_hsvc_group = HV_GRP_N2_CPU;
                        break;
 
+               case SUN4V_CHIP_NIAGARA3:
+                       perf_hsvc_group = HV_GRP_KT_CPU;
+                       break;
+
                default:
                        return -ENODEV;
                }
index 171e8d8..614da62 100644 (file)
@@ -1343,7 +1343,8 @@ static bool __init supported_pmu(void)
                sparc_pmu = &niagara1_pmu;
                return true;
        }
-       if (!strcmp(sparc_pmu_type, "niagara2")) {
+       if (!strcmp(sparc_pmu_type, "niagara2") ||
+           !strcmp(sparc_pmu_type, "niagara3")) {
                sparc_pmu = &niagara2_pmu;
                return true;
        }
index 1a371f8..8600eb2 100644 (file)
@@ -55,7 +55,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
 }
 EXPORT_SYMBOL(atomic_cmpxchg);
 
-int atomic_add_unless(atomic_t *v, int a, int u)
+int __atomic_add_unless(atomic_t *v, int a, int u)
 {
        int ret;
        unsigned long flags;
@@ -67,7 +67,7 @@ int atomic_add_unless(atomic_t *v, int a, int u)
        spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
        return ret != u;
 }
-EXPORT_SYMBOL(atomic_add_unless);
+EXPORT_SYMBOL(__atomic_add_unless);
 
 /* Atomic operations are already serializing */
 void atomic_set(atomic_t *v, int i)
index ac6739e..c3de70d 100644 (file)
@@ -1,6 +1,6 @@
 /* n2-drv.c: Niagara-2 RNG driver.
  *
- * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2008, 2011 David S. Miller <davem@davemloft.net>
  */
 
 #include <linux/kernel.h>
@@ -22,8 +22,8 @@
 
 #define DRV_MODULE_NAME                "n2rng"
 #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "0.1"
-#define DRV_MODULE_RELDATE     "May 15, 2008"
+#define DRV_MODULE_VERSION     "0.2"
+#define DRV_MODULE_RELDATE     "July 27, 2011"
 
 static char version[] __devinitdata =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -623,14 +623,14 @@ static const struct of_device_id n2rng_match[];
 static int __devinit n2rng_probe(struct platform_device *op)
 {
        const struct of_device_id *match;
-       int victoria_falls;
+       int multi_capable;
        int err = -ENOMEM;
        struct n2rng *np;
 
        match = of_match_device(n2rng_match, &op->dev);
        if (!match)
                return -EINVAL;
-       victoria_falls = (match->data != NULL);
+       multi_capable = (match->data != NULL);
 
        n2rng_driver_version();
        np = kzalloc(sizeof(*np), GFP_KERNEL);
@@ -640,8 +640,8 @@ static int __devinit n2rng_probe(struct platform_device *op)
 
        INIT_DELAYED_WORK(&np->work, n2rng_work);
 
-       if (victoria_falls)
-               np->flags |= N2RNG_FLAG_VF;
+       if (multi_capable)
+               np->flags |= N2RNG_FLAG_MULTI;
 
        err = -ENODEV;
        np->hvapi_major = 2;
@@ -658,10 +658,10 @@ static int __devinit n2rng_probe(struct platform_device *op)
                }
        }
 
-       if (np->flags & N2RNG_FLAG_VF) {
+       if (np->flags & N2RNG_FLAG_MULTI) {
                if (np->hvapi_major < 2) {
-                       dev_err(&op->dev, "VF RNG requires HVAPI major "
-                               "version 2 or later, got %lu\n",
+                       dev_err(&op->dev, "multi-unit-capable RNG requires "
+                               "HVAPI major version 2 or later, got %lu\n",
                                np->hvapi_major);
                        goto out_hvapi_unregister;
                }
@@ -688,8 +688,8 @@ static int __devinit n2rng_probe(struct platform_device *op)
                goto out_free_units;
 
        dev_info(&op->dev, "Found %s RNG, units: %d\n",
-                ((np->flags & N2RNG_FLAG_VF) ?
-                 "Victoria Falls" : "Niagara2"),
+                ((np->flags & N2RNG_FLAG_MULTI) ?
+                 "multi-unit-capable" : "single-unit"),
                 np->num_units);
 
        np->hwrng.name = "n2rng";
@@ -751,6 +751,11 @@ static const struct of_device_id n2rng_match[] = {
                .compatible     = "SUNW,vf-rng",
                .data           = (void *) 1,
        },
+       {
+               .name           = "random-number-generator",
+               .compatible     = "SUNW,kt-rng",
+               .data           = (void *) 1,
+       },
        {},
 };
 MODULE_DEVICE_TABLE(of, n2rng_match);
index 4bea07f..f244ac8 100644 (file)
@@ -68,7 +68,7 @@ struct n2rng {
        struct platform_device  *op;
 
        unsigned long           flags;
-#define N2RNG_FLAG_VF          0x00000001 /* Victoria Falls RNG, else N2 */
+#define N2RNG_FLAG_MULTI       0x00000001 /* Multi-unit capable RNG */
 #define N2RNG_FLAG_CONTROL     0x00000002 /* Operating in control domain */
 #define N2RNG_FLAG_READY       0x00000008 /* Ready for hw-rng layer      */
 #define N2RNG_FLAG_SHUTDOWN    0x00000010 /* Driver unregistering        */
index 7beb0e2..caf8012 100644 (file)
@@ -534,6 +534,7 @@ void tpm_get_timeouts(struct tpm_chip *chip)
        struct duration_t *duration_cap;
        ssize_t rc;
        u32 timeout;
+       unsigned int scale = 1;
 
        tpm_cmd.header.in = tpm_getcap_header;
        tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
@@ -545,24 +546,30 @@ void tpm_get_timeouts(struct tpm_chip *chip)
        if (rc)
                goto duration;
 
-       if (be32_to_cpu(tpm_cmd.header.out.length)
-           != 4 * sizeof(u32))
-               goto duration;
+       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+           be32_to_cpu(tpm_cmd.header.out.length)
+           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
+               return;
 
        timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
        /* Don't overwrite default if value is 0 */
        timeout = be32_to_cpu(timeout_cap->a);
+       if (timeout && timeout < 1000) {
+               /* timeouts in msec rather usec */
+               scale = 1000;
+               chip->vendor.timeout_adjusted = true;
+       }
        if (timeout)
-               chip->vendor.timeout_a = usecs_to_jiffies(timeout);
+               chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
        timeout = be32_to_cpu(timeout_cap->b);
        if (timeout)
-               chip->vendor.timeout_b = usecs_to_jiffies(timeout);
+               chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
        timeout = be32_to_cpu(timeout_cap->c);
        if (timeout)
-               chip->vendor.timeout_c = usecs_to_jiffies(timeout);
+               chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
        timeout = be32_to_cpu(timeout_cap->d);
        if (timeout)
-               chip->vendor.timeout_d = usecs_to_jiffies(timeout);
+               chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
 
 duration:
        tpm_cmd.header.in = tpm_getcap_header;
@@ -575,23 +582,31 @@ duration:
        if (rc)
                return;
 
-       if (be32_to_cpu(tpm_cmd.header.out.return_code)
-           != 3 * sizeof(u32))
+       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+           be32_to_cpu(tpm_cmd.header.out.length)
+           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
                return;
+
        duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
        chip->vendor.duration[TPM_SHORT] =
            usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
+       chip->vendor.duration[TPM_MEDIUM] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
+       chip->vendor.duration[TPM_LONG] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
+
        /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
         * value wrong and apparently reports msecs rather than usecs. So we
         * fix up the resulting too-small TPM_SHORT value to make things work.
+        * We also scale the TPM_MEDIUM and -_LONG values by 1000.
         */
-       if (chip->vendor.duration[TPM_SHORT] < (HZ/100))
+       if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
                chip->vendor.duration[TPM_SHORT] = HZ;
-
-       chip->vendor.duration[TPM_MEDIUM] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
-       chip->vendor.duration[TPM_LONG] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
+               chip->vendor.duration[TPM_MEDIUM] *= 1000;
+               chip->vendor.duration[TPM_LONG] *= 1000;
+               chip->vendor.duration_adjusted = true;
+               dev_info(chip->dev, "Adjusting TPM timeout parameters.");
+       }
 }
 EXPORT_SYMBOL_GPL(tpm_get_timeouts);
 
@@ -600,7 +615,7 @@ void tpm_continue_selftest(struct tpm_chip *chip)
        u8 data[] = {
                0, 193,                 /* TPM_TAG_RQU_COMMAND */
                0, 0, 0, 10,            /* length */
-               0, 0, 0, 83,            /* TPM_ORD_GetCapability */
+               0, 0, 0, 83,            /* TPM_ORD_ContinueSelfTest */
        };
 
        tpm_transmit(chip, data, sizeof(data));
@@ -863,18 +878,24 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
        data = tpm_cmd.params.readpubek_out_buffer;
        str +=
            sprintf(str,
-                   "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n"
-                   "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X"
-                   " %02X %02X %02X %02X %02X %02X %02X %02X\n"
-                   "Modulus length: %d\nModulus: \n",
-                   data[10], data[11], data[12], data[13], data[14],
-                   data[15], data[16], data[17], data[22], data[23],
-                   data[24], data[25], data[26], data[27], data[28],
-                   data[29], data[30], data[31], data[32], data[33],
-                   be32_to_cpu(*((__be32 *) (data + 34))));
+                   "Algorithm: %02X %02X %02X %02X\n"
+                   "Encscheme: %02X %02X\n"
+                   "Sigscheme: %02X %02X\n"
+                   "Parameters: %02X %02X %02X %02X "
+                   "%02X %02X %02X %02X "
+                   "%02X %02X %02X %02X\n"
+                   "Modulus length: %d\n"
+                   "Modulus:\n",
+                   data[0], data[1], data[2], data[3],
+                   data[4], data[5],
+                   data[6], data[7],
+                   data[12], data[13], data[14], data[15],
+                   data[16], data[17], data[18], data[19],
+                   data[20], data[21], data[22], data[23],
+                   be32_to_cpu(*((__be32 *) (data + 24))));
 
        for (i = 0; i < 256; i++) {
-               str += sprintf(str, "%02X ", data[i + 38]);
+               str += sprintf(str, "%02X ", data[i + 28]);
                if ((i + 1) % 16 == 0)
                        str += sprintf(str, "\n");
        }
@@ -937,6 +958,35 @@ ssize_t tpm_show_caps_1_2(struct device * dev,
 }
 EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
 
+ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d %d %d [%s]\n",
+                      jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
+                      jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
+                      jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
+                      chip->vendor.duration_adjusted
+                      ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_durations);
+
+ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d %d %d %d [%s]\n",
+                      jiffies_to_usecs(chip->vendor.timeout_a),
+                      jiffies_to_usecs(chip->vendor.timeout_b),
+                      jiffies_to_usecs(chip->vendor.timeout_c),
+                      jiffies_to_usecs(chip->vendor.timeout_d),
+                      chip->vendor.timeout_adjusted
+                      ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_timeouts);
+
 ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
                        const char *buf, size_t count)
 {
index 72ddb03..9c4163c 100644 (file)
@@ -56,6 +56,10 @@ extern ssize_t tpm_show_owned(struct device *, struct device_attribute *attr,
                                char *);
 extern ssize_t tpm_show_temp_deactivated(struct device *,
                                         struct device_attribute *attr, char *);
+extern ssize_t tpm_show_durations(struct device *,
+                                 struct device_attribute *attr, char *);
+extern ssize_t tpm_show_timeouts(struct device *,
+                                struct device_attribute *attr, char *);
 
 struct tpm_chip;
 
@@ -67,6 +71,7 @@ struct tpm_vendor_specific {
        unsigned long base;             /* TPM base address */
 
        int irq;
+       int probed_irq;
 
        int region_size;
        int have_region;
@@ -81,7 +86,9 @@ struct tpm_vendor_specific {
        struct list_head list;
        int locality;
        unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */
+       bool timeout_adjusted;
        unsigned long duration[3]; /* jiffies */
+       bool duration_adjusted;
 
        wait_queue_head_t read_queue;
        wait_queue_head_t int_queue;
index a605cb7..82facc9 100644 (file)
@@ -330,12 +330,12 @@ static int __init init_nsc(void)
        pdev->dev.driver = &nsc_drv.driver;
        pdev->dev.release = tpm_nsc_remove;
 
-       if ((rc = platform_device_register(pdev)) < 0)
-               goto err_free_dev;
+       if ((rc = platform_device_add(pdev)) < 0)
+               goto err_put_dev;
 
        if (request_region(base, 2, "tpm_nsc0") == NULL ) {
                rc = -EBUSY;
-               goto err_unreg_dev;
+               goto err_del_dev;
        }
 
        if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) {
@@ -382,10 +382,10 @@ static int __init init_nsc(void)
 
 err_rel_reg:
        release_region(base, 2);
-err_unreg_dev:
-       platform_device_unregister(pdev);
-err_free_dev:
-       kfree(pdev);
+err_del_dev:
+       platform_device_del(pdev);
+err_put_dev:
+       platform_device_put(pdev);
 err_unreg_drv:
        platform_driver_unregister(&nsc_drv);
        return rc;
index dd21df5..7fc2f10 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/wait.h>
 #include <linux/acpi.h>
+#include <linux/freezer.h>
 #include "tpm.h"
 
 #define TPM_HEADER_SIZE 10
@@ -79,7 +80,7 @@ enum tis_defaults {
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_PNP
 static int is_itpm(struct pnp_dev *dev)
 {
        struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -92,11 +93,6 @@ static int is_itpm(struct pnp_dev *dev)
 
        return 0;
 }
-#else
-static int is_itpm(struct pnp_dev *dev)
-{
-       return 0;
-}
 #endif
 
 static int check_locality(struct tpm_chip *chip, int l)
@@ -120,7 +116,7 @@ static void release_locality(struct tpm_chip *chip, int l, int force)
 
 static int request_locality(struct tpm_chip *chip, int l)
 {
-       unsigned long stop;
+       unsigned long stop, timeout;
        long rc;
 
        if (check_locality(chip, l) >= 0)
@@ -129,17 +125,25 @@ static int request_locality(struct tpm_chip *chip, int l)
        iowrite8(TPM_ACCESS_REQUEST_USE,
                 chip->vendor.iobase + TPM_ACCESS(l));
 
+       stop = jiffies + chip->vendor.timeout_a;
+
        if (chip->vendor.irq) {
+again:
+               timeout = stop - jiffies;
+               if ((long)timeout <= 0)
+                       return -1;
                rc = wait_event_interruptible_timeout(chip->vendor.int_queue,
                                                      (check_locality
                                                       (chip, l) >= 0),
-                                                     chip->vendor.timeout_a);
+                                                     timeout);
                if (rc > 0)
                        return l;
-
+               if (rc == -ERESTARTSYS && freezing(current)) {
+                       clear_thread_flag(TIF_SIGPENDING);
+                       goto again;
+               }
        } else {
                /* wait for burstcount */
-               stop = jiffies + chip->vendor.timeout_a;
                do {
                        if (check_locality(chip, l) >= 0)
                                return l;
@@ -196,15 +200,24 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
        if ((status & mask) == mask)
                return 0;
 
+       stop = jiffies + timeout;
+
        if (chip->vendor.irq) {
+again:
+               timeout = stop - jiffies;
+               if ((long)timeout <= 0)
+                       return -ETIME;
                rc = wait_event_interruptible_timeout(*queue,
                                                      ((tpm_tis_status
                                                        (chip) & mask) ==
                                                       mask), timeout);
                if (rc > 0)
                        return 0;
+               if (rc == -ERESTARTSYS && freezing(current)) {
+                       clear_thread_flag(TIF_SIGPENDING);
+                       goto again;
+               }
        } else {
-               stop = jiffies + timeout;
                do {
                        msleep(TPM_TIMEOUT);
                        status = tpm_tis_status(chip);
@@ -288,11 +301,10 @@ MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)");
  * tpm.c can skip polling for the data to be available as the interrupt is
  * waited for here
  */
-static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
+static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len)
 {
        int rc, status, burstcnt;
        size_t count = 0;
-       u32 ordinal;
 
        if (request_locality(chip, 0) < 0)
                return -EBUSY;
@@ -327,8 +339,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 
        /* write last byte */
        iowrite8(buf[count],
-                chip->vendor.iobase +
-                TPM_DATA_FIFO(chip->vendor.locality));
+                chip->vendor.iobase + TPM_DATA_FIFO(chip->vendor.locality));
        wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c,
                      &chip->vendor.int_queue);
        status = tpm_tis_status(chip);
@@ -337,6 +348,28 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
                goto out_err;
        }
 
+       return 0;
+
+out_err:
+       tpm_tis_ready(chip);
+       release_locality(chip, chip->vendor.locality, 0);
+       return rc;
+}
+
+/*
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       int rc;
+       u32 ordinal;
+
+       rc = tpm_tis_send_data(chip, buf, len);
+       if (rc < 0)
+               return rc;
+
        /* go and do it */
        iowrite8(TPM_STS_GO,
                 chip->vendor.iobase + TPM_STS(chip->vendor.locality));
@@ -358,6 +391,47 @@ out_err:
        return rc;
 }
 
+/*
+ * Early probing for iTPM with STS_DATA_EXPECT flaw.
+ * Try sending command without itpm flag set and if that
+ * fails, repeat with itpm flag set.
+ */
+static int probe_itpm(struct tpm_chip *chip)
+{
+       int rc = 0;
+       u8 cmd_getticks[] = {
+               0x00, 0xc1, 0x00, 0x00, 0x00, 0x0a,
+               0x00, 0x00, 0x00, 0xf1
+       };
+       size_t len = sizeof(cmd_getticks);
+       int rem_itpm = itpm;
+
+       itpm = 0;
+
+       rc = tpm_tis_send_data(chip, cmd_getticks, len);
+       if (rc == 0)
+               goto out;
+
+       tpm_tis_ready(chip);
+       release_locality(chip, chip->vendor.locality, 0);
+
+       itpm = 1;
+
+       rc = tpm_tis_send_data(chip, cmd_getticks, len);
+       if (rc == 0) {
+               dev_info(chip->dev, "Detected an iTPM.\n");
+               rc = 1;
+       } else
+               rc = -EFAULT;
+
+out:
+       itpm = rem_itpm;
+       tpm_tis_ready(chip);
+       release_locality(chip, chip->vendor.locality, 0);
+
+       return rc;
+}
+
 static const struct file_operations tis_ops = {
        .owner = THIS_MODULE,
        .llseek = no_llseek,
@@ -376,6 +450,8 @@ static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                   NULL);
 static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
 
 static struct attribute *tis_attrs[] = {
        &dev_attr_pubek.attr,
@@ -385,7 +461,9 @@ static struct attribute *tis_attrs[] = {
        &dev_attr_owned.attr,
        &dev_attr_temp_deactivated.attr,
        &dev_attr_caps.attr,
-       &dev_attr_cancel.attr, NULL,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr, NULL,
 };
 
 static struct attribute_group tis_attr_grp = {
@@ -416,7 +494,7 @@ static irqreturn_t tis_int_probe(int irq, void *dev_id)
        if (interrupt == 0)
                return IRQ_NONE;
 
-       chip->vendor.irq = irq;
+       chip->vendor.probed_irq = irq;
 
        /* Clear interrupts handled with TPM_EOI */
        iowrite32(interrupt,
@@ -464,7 +542,7 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
                        resource_size_t len, unsigned int irq)
 {
        u32 vendor, intfcaps, intmask;
-       int rc, i;
+       int rc, i, irq_s, irq_e;
        struct tpm_chip *chip;
 
        if (!(chip = tpm_register_hardware(dev, &tpm_tis)))
@@ -493,6 +571,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
                 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
                 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
 
+       if (!itpm) {
+               itpm = probe_itpm(chip);
+               if (itpm < 0) {
+                       rc = -ENODEV;
+                       goto out_err;
+               }
+       }
+
        if (itpm)
                dev_info(dev, "Intel iTPM workaround enabled\n");
 
@@ -522,6 +608,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
        if (intfcaps & TPM_INTF_DATA_AVAIL_INT)
                dev_dbg(dev, "\tData Avail Int Support\n");
 
+       /* get the timeouts before testing for irqs */
+       tpm_get_timeouts(chip);
+
        /* INTERRUPT Setup */
        init_waitqueue_head(&chip->vendor.read_queue);
        init_waitqueue_head(&chip->vendor.int_queue);
@@ -540,13 +629,19 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
        if (interrupts)
                chip->vendor.irq = irq;
        if (interrupts && !chip->vendor.irq) {
-               chip->vendor.irq =
+               irq_s =
                    ioread8(chip->vendor.iobase +
                            TPM_INT_VECTOR(chip->vendor.locality));
+               if (irq_s) {
+                       irq_e = irq_s;
+               } else {
+                       irq_s = 3;
+                       irq_e = 15;
+               }
 
-               for (i = 3; i < 16 && chip->vendor.irq == 0; i++) {
+               for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) {
                        iowrite8(i, chip->vendor.iobase +
-                                   TPM_INT_VECTOR(chip->vendor.locality));
+                                TPM_INT_VECTOR(chip->vendor.locality));
                        if (request_irq
                            (i, tis_int_probe, IRQF_SHARED,
                             chip->vendor.miscdev.name, chip) != 0) {
@@ -568,9 +663,22 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
                                  chip->vendor.iobase +
                                  TPM_INT_ENABLE(chip->vendor.locality));
 
+                       chip->vendor.probed_irq = 0;
+
                        /* Generate Interrupts */
                        tpm_gen_interrupt(chip);
 
+                       chip->vendor.irq = chip->vendor.probed_irq;
+
+                       /* free_irq will call into tis_int_probe;
+                          clear all irqs we haven't seen while doing
+                          tpm_gen_interrupt */
+                       iowrite32(ioread32
+                                 (chip->vendor.iobase +
+                                  TPM_INT_STATUS(chip->vendor.locality)),
+                                 chip->vendor.iobase +
+                                 TPM_INT_STATUS(chip->vendor.locality));
+
                        /* Turn off */
                        iowrite32(intmask,
                                  chip->vendor.iobase +
@@ -609,7 +717,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
        list_add(&chip->vendor.list, &tis_chips);
        spin_unlock(&tis_lock);
 
-       tpm_get_timeouts(chip);
        tpm_continue_selftest(chip);
 
        return 0;
@@ -619,6 +726,29 @@ out_err:
        tpm_remove_hardware(chip->dev);
        return rc;
 }
+
+static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
+{
+       u32 intmask;
+
+       /* reenable interrupts that device may have lost or
+          BIOS/firmware may have disabled */
+       iowrite8(chip->vendor.irq, chip->vendor.iobase +
+                TPM_INT_VECTOR(chip->vendor.locality));
+
+       intmask =
+           ioread32(chip->vendor.iobase +
+                    TPM_INT_ENABLE(chip->vendor.locality));
+
+       intmask |= TPM_INTF_CMD_READY_INT
+           | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT
+           | TPM_INTF_STS_VALID_INT | TPM_GLOBAL_INT_ENABLE;
+
+       iowrite32(intmask,
+                 chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality));
+}
+
+
 #ifdef CONFIG_PNP
 static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
                                      const struct pnp_device_id *pnp_id)
@@ -650,6 +780,9 @@ static int tpm_tis_pnp_resume(struct pnp_dev *dev)
        struct tpm_chip *chip = pnp_get_drvdata(dev);
        int ret;
 
+       if (chip->vendor.irq)
+               tpm_tis_reenable_interrupts(chip);
+
        ret = tpm_pm_resume(&dev->dev);
        if (!ret)
                tpm_continue_selftest(chip);
@@ -702,6 +835,11 @@ static int tpm_tis_suspend(struct platform_device *dev, pm_message_t msg)
 
 static int tpm_tis_resume(struct platform_device *dev)
 {
+       struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
+
+       if (chip->vendor.irq)
+               tpm_tis_reenable_interrupts(chip);
+
        return tpm_pm_resume(&dev->dev);
 }
 static struct platform_driver tis_drv = {
index 2e5b204..d0183dd 100644 (file)
@@ -1,6 +1,6 @@
 /* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support.
  *
- * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2010, 2011 David S. Miller <davem@davemloft.net>
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -31,8 +31,8 @@
 #include "n2_core.h"
 
 #define DRV_MODULE_NAME                "n2_crypto"
-#define DRV_MODULE_VERSION     "0.1"
-#define DRV_MODULE_RELDATE     "April 29, 2010"
+#define DRV_MODULE_VERSION     "0.2"
+#define DRV_MODULE_RELDATE     "July 28, 2011"
 
 static char version[] __devinitdata =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -1823,22 +1823,17 @@ static int spu_mdesc_scan(struct mdesc_handle *mdesc, struct platform_device *de
 static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node,
                                   struct spu_mdesc_info *ip)
 {
-       const u64 *intr, *ino;
-       int intr_len, ino_len;
+       const u64 *ino;
+       int ino_len;
        int i;
 
-       intr = mdesc_get_property(mdesc, node, "intr", &intr_len);
-       if (!intr)
-               return -ENODEV;
-
        ino = mdesc_get_property(mdesc, node, "ino", &ino_len);
-       if (!ino)
+       if (!ino) {
+               printk("NO 'ino'\n");
                return -ENODEV;
+       }
 
-       if (intr_len != ino_len)
-               return -EINVAL;
-
-       ip->num_intrs = intr_len / sizeof(u64);
+       ip->num_intrs = ino_len / sizeof(u64);
        ip->ino_table = kzalloc((sizeof(struct ino_blob) *
                                 ip->num_intrs),
                                GFP_KERNEL);
@@ -1847,7 +1842,7 @@ static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node,
 
        for (i = 0; i < ip->num_intrs; i++) {
                struct ino_blob *b = &ip->ino_table[i];
-               b->intr = intr[i];
+               b->intr = i + 1;
                b->ino = ino[i];
        }
 
@@ -2204,6 +2199,10 @@ static struct of_device_id n2_crypto_match[] = {
                .name = "n2cp",
                .compatible = "SUNW,vf-cwq",
        },
+       {
+               .name = "n2cp",
+               .compatible = "SUNW,kt-cwq",
+       },
        {},
 };
 
@@ -2228,6 +2227,10 @@ static struct of_device_id n2_mau_match[] = {
                .name = "ncp",
                .compatible = "SUNW,vf-mau",
        },
+       {
+               .name = "ncp",
+               .compatible = "SUNW,kt-mau",
+       },
        {},
 };
 
index 95a08a8..5745b7f 100644 (file)
@@ -271,7 +271,7 @@ int iser_send_command(struct iscsi_conn *conn,
        unsigned long edtl;
        int err;
        struct iser_data_buf *data_buf;
-       struct iscsi_cmd *hdr =  (struct iscsi_cmd *)task->hdr;
+       struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
        struct scsi_cmnd *sc  =  task->sc;
        struct iser_tx_desc *tx_desc = &iser_task->desc;
 
index 56abf3d..d728875 100644 (file)
@@ -154,10 +154,13 @@ static const struct xpad_device {
        { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
        { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX },
        { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+       { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+       { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 },
        { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
        { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+       { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
        { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
@@ -236,9 +239,10 @@ static struct usb_device_id xpad_table [] = {
        XPAD_XBOX360_VENDOR(0x046d),            /* Logitech X-Box 360 style controllers */
        XPAD_XBOX360_VENDOR(0x0738),            /* Mad Catz X-Box 360 controllers */
        XPAD_XBOX360_VENDOR(0x0e6f),            /* 0x0e6f X-Box 360 controllers */
+       XPAD_XBOX360_VENDOR(0x12ab),            /* X-Box 360 dance pads */
        XPAD_XBOX360_VENDOR(0x1430),            /* RedOctane X-Box 360 controllers */
        XPAD_XBOX360_VENDOR(0x146b),            /* BigBen Interactive Controllers */
-       XPAD_XBOX360_VENDOR(0x1bad),            /* Rock Band Drums */
+       XPAD_XBOX360_VENDOR(0x1bad),            /* Harminix Rock Band Guitar and Drums */
        XPAD_XBOX360_VENDOR(0x0f0d),            /* Hori Controllers */
        { }
 };
@@ -545,7 +549,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
        struct usb_endpoint_descriptor *ep_irq_out;
        int error;
 
-       if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX)
+       if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;
 
        xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -579,13 +583,13 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
 
 static void xpad_stop_output(struct usb_xpad *xpad)
 {
-       if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX)
+       if (xpad->xtype != XTYPE_UNKNOWN)
                usb_kill_urb(xpad->irq_out);
 }
 
 static void xpad_deinit_output(struct usb_xpad *xpad)
 {
-       if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) {
+       if (xpad->xtype != XTYPE_UNKNOWN) {
                usb_free_urb(xpad->irq_out);
                usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
                                xpad->odata, xpad->odata_dma);
@@ -632,6 +636,23 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
 
                        return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
 
+               case XTYPE_XBOX360W:
+                       xpad->odata[0] = 0x00;
+                       xpad->odata[1] = 0x01;
+                       xpad->odata[2] = 0x0F;
+                       xpad->odata[3] = 0xC0;
+                       xpad->odata[4] = 0x00;
+                       xpad->odata[5] = strong / 256;
+                       xpad->odata[6] = weak / 256;
+                       xpad->odata[7] = 0x00;
+                       xpad->odata[8] = 0x00;
+                       xpad->odata[9] = 0x00;
+                       xpad->odata[10] = 0x00;
+                       xpad->odata[11] = 0x00;
+                       xpad->irq_out->transfer_buffer_length = 12;
+
+                       return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+
                default:
                        dbg("%s - rumble command sent to unsupported xpad type: %d",
                                __func__, xpad->xtype);
@@ -644,7 +665,7 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
 
 static int xpad_init_ff(struct usb_xpad *xpad)
 {
-       if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX)
+       if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;
 
        input_set_capability(xpad->dev, EV_FF, FF_RUMBLE);
index af45d27..7b404e5 100644 (file)
@@ -9,7 +9,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
index 6315986..c770826 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
index 11478eb..19cfc0c 100644 (file)
@@ -1578,14 +1578,14 @@ static int __init atkbd_setup_forced_release(const struct dmi_system_id *id)
        atkbd_platform_fixup = atkbd_apply_forced_release_keylist;
        atkbd_platform_fixup_data = id->driver_data;
 
-       return 0;
+       return 1;
 }
 
 static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id)
 {
        atkbd_platform_scancode_fixup = id->driver_data;
 
-       return 0;
+       return 1;
 }
 
 static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = {
index 6e6145b..ce281d1 100644 (file)
@@ -2,6 +2,7 @@
  * Driver for keys on GPIO lines capable of generating interrupts.
  *
  * Copyright 2005 Phil Blundell
+ * Copyright 2010, 2011 David Jander <david@protonic.nl>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -25,6 +26,8 @@
 #include <linux/gpio_keys.h>
 #include <linux/workqueue.h>
 #include <linux/gpio.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
 
 struct gpio_button_data {
        struct gpio_keys_button *button;
@@ -415,7 +418,7 @@ static int __devinit gpio_keys_setup_key(struct platform_device *pdev,
        if (!button->can_disable)
                irqflags |= IRQF_SHARED;
 
-       error = request_any_context_irq(irq, gpio_keys_isr, irqflags, desc, bdata);
+       error = request_threaded_irq(irq, NULL, gpio_keys_isr, irqflags, desc, bdata);
        if (error < 0) {
                dev_err(dev, "Unable to claim irq %d; error %d\n",
                        irq, error);
@@ -445,15 +448,120 @@ static void gpio_keys_close(struct input_dev *input)
                ddata->disable(input->dev.parent);
 }
 
+/*
+ * Handlers for alternative sources of platform_data
+ */
+#ifdef CONFIG_OF
+/*
+ * Translate OpenFirmware node properties into platform_data
+ */
+static int gpio_keys_get_devtree_pdata(struct device *dev,
+                           struct gpio_keys_platform_data *pdata)
+{
+       struct device_node *node, *pp;
+       int i;
+       struct gpio_keys_button *buttons;
+       const u32 *reg;
+       int len;
+
+       node = dev->of_node;
+       if (node == NULL)
+               return -ENODEV;
+
+       memset(pdata, 0, sizeof *pdata);
+
+       pdata->rep = !!of_get_property(node, "autorepeat", &len);
+
+       /* First count the subnodes */
+       pdata->nbuttons = 0;
+       pp = NULL;
+       while ((pp = of_get_next_child(node, pp)))
+               pdata->nbuttons++;
+
+       if (pdata->nbuttons == 0)
+               return -ENODEV;
+
+       buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
+       if (!buttons)
+               return -ENODEV;
+
+       pp = NULL;
+       i = 0;
+       while ((pp = of_get_next_child(node, pp))) {
+               enum of_gpio_flags flags;
+
+               if (!of_find_property(pp, "gpios", NULL)) {
+                       pdata->nbuttons--;
+                       dev_warn(dev, "Found button without gpios\n");
+                       continue;
+               }
+               buttons[i].gpio = of_get_gpio_flags(pp, 0, &flags);
+               buttons[i].active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+               reg = of_get_property(pp, "linux,code", &len);
+               if (!reg) {
+                       dev_err(dev, "Button without keycode: 0x%x\n", buttons[i].gpio);
+                       goto out_fail;
+               }
+               buttons[i].code = be32_to_cpup(reg);
+
+               buttons[i].desc = of_get_property(pp, "label", &len);
+
+               reg = of_get_property(pp, "linux,input-type", &len);
+               buttons[i].type = reg ? be32_to_cpup(reg) : EV_KEY;
+
+               buttons[i].wakeup = !!of_get_property(pp, "gpio-key,wakeup", NULL);
+
+               reg = of_get_property(pp, "debounce-interval", &len);
+               buttons[i].debounce_interval = reg ? be32_to_cpup(reg) : 5;
+
+               i++;
+       }
+
+       pdata->buttons = buttons;
+
+       return 0;
+
+out_fail:
+       kfree(buttons);
+       return -ENODEV;
+}
+
+static struct of_device_id gpio_keys_of_match[] = {
+       { .compatible = "gpio-keys", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, gpio_keys_of_match);
+
+#else
+
+static int gpio_keys_get_devtree_pdata(struct device *dev,
+                           struct gpio_keys_platform_data *altp)
+{
+       return -ENODEV;
+}
+
+#define gpio_keys_of_match NULL
+
+#endif
+
 static int __devinit gpio_keys_probe(struct platform_device *pdev)
 {
        struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
        struct gpio_keys_drvdata *ddata;
        struct device *dev = &pdev->dev;
+       struct gpio_keys_platform_data alt_pdata;
        struct input_dev *input;
        int i, error;
        int wakeup = 0;
 
+       if (!pdata) {
+               error = gpio_keys_get_devtree_pdata(dev, &alt_pdata);
+               if (error)
+                       return error;
+               pdata = &alt_pdata;
+       }
+
        ddata = kzalloc(sizeof(struct gpio_keys_drvdata) +
                        pdata->nbuttons * sizeof(struct gpio_button_data),
                        GFP_KERNEL);
@@ -544,13 +652,15 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
  fail1:
        input_free_device(input);
        kfree(ddata);
+       /* If we have no platform_data, we allocated buttons dynamically. */
+       if (!pdev->dev.platform_data)
+               kfree(pdata->buttons);
 
        return error;
 }
 
 static int __devexit gpio_keys_remove(struct platform_device *pdev)
 {
-       struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
        struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
        struct input_dev *input = ddata->input;
        int i;
@@ -559,31 +669,39 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev)
 
        device_init_wakeup(&pdev->dev, 0);
 
-       for (i = 0; i < pdata->nbuttons; i++) {
-               int irq = gpio_to_irq(pdata->buttons[i].gpio);
+       for (i = 0; i < ddata->n_buttons; i++) {
+               int irq = gpio_to_irq(ddata->data[i].button->gpio);
                free_irq(irq, &ddata->data[i]);
                if (ddata->data[i].timer_debounce)
                        del_timer_sync(&ddata->data[i].timer);
                cancel_work_sync(&ddata->data[i].work);
-               gpio_free(pdata->buttons[i].gpio);
+               gpio_free(ddata->data[i].button->gpio);
        }
 
        input_unregister_device(input);
 
+       /*
+        * If we had no platform_data, we allocated buttons dynamically, and
+        * must free them here. ddata->data[0].button is the pointer to the
+        * beginning of the allocated array.
+        */
+       if (!pdev->dev.platform_data)
+               kfree(ddata->data[0].button);
+
+       kfree(ddata);
+
        return 0;
 }
 
-
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int gpio_keys_suspend(struct device *dev)
 {
-       struct platform_device *pdev = to_platform_device(dev);
-       struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+       struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
        int i;
 
-       if (device_may_wakeup(&pdev->dev)) {
-               for (i = 0; i < pdata->nbuttons; i++) {
-                       struct gpio_keys_button *button = &pdata->buttons[i];
+       if (device_may_wakeup(dev)) {
+               for (i = 0; i < ddata->n_buttons; i++) {
+                       struct gpio_keys_button *button = ddata->data[i].button;
                        if (button->wakeup) {
                                int irq = gpio_to_irq(button->gpio);
                                enable_irq_wake(irq);
@@ -596,15 +714,13 @@ static int gpio_keys_suspend(struct device *dev)
 
 static int gpio_keys_resume(struct device *dev)
 {
-       struct platform_device *pdev = to_platform_device(dev);
-       struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
-       struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+       struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
        int i;
 
-       for (i = 0; i < pdata->nbuttons; i++) {
+       for (i = 0; i < ddata->n_buttons; i++) {
 
-               struct gpio_keys_button *button = &pdata->buttons[i];
-               if (button->wakeup && device_may_wakeup(&pdev->dev)) {
+               struct gpio_keys_button *button = ddata->data[i].button;
+               if (button->wakeup && device_may_wakeup(dev)) {
                        int irq = gpio_to_irq(button->gpio);
                        disable_irq_wake(irq);
                }
@@ -615,22 +731,18 @@ static int gpio_keys_resume(struct device *dev)
 
        return 0;
 }
-
-static const struct dev_pm_ops gpio_keys_pm_ops = {
-       .suspend        = gpio_keys_suspend,
-       .resume         = gpio_keys_resume,
-};
 #endif
 
+static SIMPLE_DEV_PM_OPS(gpio_keys_pm_ops, gpio_keys_suspend, gpio_keys_resume);
+
 static struct platform_driver gpio_keys_device_driver = {
        .probe          = gpio_keys_probe,
        .remove         = __devexit_p(gpio_keys_remove),
        .driver         = {
                .name   = "gpio-keys",
                .owner  = THIS_MODULE,
-#ifdef CONFIG_PM
                .pm     = &gpio_keys_pm_ops,
-#endif
+               .of_match_table = gpio_keys_of_match,
        }
 };
 
@@ -644,10 +756,10 @@ static void __exit gpio_keys_exit(void)
        platform_driver_unregister(&gpio_keys_device_driver);
 }
 
-module_init(gpio_keys_init);
+late_initcall(gpio_keys_init);
 module_exit(gpio_keys_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Phil Blundell <pb@handhelds.org>");
-MODULE_DESCRIPTION("Keyboard driver for CPU GPIOs");
+MODULE_DESCRIPTION("Keyboard driver for GPIOs");
 MODULE_ALIAS("platform:gpio-keys");
index 71f744a..ab0acaf 100644 (file)
@@ -146,7 +146,6 @@ struct lm8323_chip {
        /* device lock */
        struct mutex            lock;
        struct i2c_client       *client;
-       struct work_struct      work;
        struct input_dev        *idev;
        bool                    kp_enabled;
        bool                    pm_suspend;
@@ -162,7 +161,6 @@ struct lm8323_chip {
 
 #define client_to_lm8323(c)    container_of(c, struct lm8323_chip, client)
 #define dev_to_lm8323(d)       container_of(d, struct lm8323_chip, client->dev)
-#define work_to_lm8323(w)      container_of(w, struct lm8323_chip, work)
 #define cdev_to_pwm(c)         container_of(c, struct lm8323_pwm, cdev)
 #define work_to_pwm(w)         container_of(w, struct lm8323_pwm, work)
 
@@ -375,9 +373,9 @@ static void pwm_done(struct lm8323_pwm *pwm)
  * Bottom half: handle the interrupt by posting key events, or dealing with
  * errors appropriately.
  */
-static void lm8323_work(struct work_struct *work)
+static irqreturn_t lm8323_irq(int irq, void *_lm)
 {
-       struct lm8323_chip *lm = work_to_lm8323(work);
+       struct lm8323_chip *lm = _lm;
        u8 ints;
        int i;
 
@@ -409,16 +407,6 @@ static void lm8323_work(struct work_struct *work)
        }
 
        mutex_unlock(&lm->lock);
-}
-
-/*
- * We cannot use I2C in interrupt context, so we just schedule work.
- */
-static irqreturn_t lm8323_irq(int irq, void *data)
-{
-       struct lm8323_chip *lm = data;
-
-       schedule_work(&lm->work);
 
        return IRQ_HANDLED;
 }
@@ -675,7 +663,6 @@ static int __devinit lm8323_probe(struct i2c_client *client,
        lm->client = client;
        lm->idev = idev;
        mutex_init(&lm->lock);
-       INIT_WORK(&lm->work, lm8323_work);
 
        lm->size_x = pdata->size_x;
        lm->size_y = pdata->size_y;
@@ -746,9 +733,8 @@ static int __devinit lm8323_probe(struct i2c_client *client,
                goto fail3;
        }
 
-       err = request_irq(client->irq, lm8323_irq,
-                         IRQF_TRIGGER_FALLING | IRQF_DISABLED,
-                         "lm8323", lm);
+       err = request_threaded_irq(client->irq, NULL, lm8323_irq,
+                         IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm);
        if (err) {
                dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
                goto fail4;
@@ -783,7 +769,6 @@ static int __devexit lm8323_remove(struct i2c_client *client)
 
        disable_irq_wake(client->irq);
        free_irq(client->irq, lm);
-       cancel_work_sync(&lm->work);
 
        input_unregister_device(lm->idev);
 
index 0a9e811..1c1615d 100644 (file)
  * enabled capacitance sensing inputs and its run/suspend mode.
  */
 #define ELECTRODE_CONF_ADDR            0x5e
+#define ELECTRODE_CONF_QUICK_CHARGE    0x80
 #define AUTO_CONFIG_CTRL_ADDR          0x7b
 #define AUTO_CONFIG_USL_ADDR           0x7d
 #define AUTO_CONFIG_LSL_ADDR           0x7e
 #define AUTO_CONFIG_TL_ADDR            0x7f
 
 /* Threshold of touch/release trigger */
-#define TOUCH_THRESHOLD                        0x0f
-#define RELEASE_THRESHOLD              0x0a
+#define TOUCH_THRESHOLD                        0x08
+#define RELEASE_THRESHOLD              0x05
 /* Masks for touch and release triggers */
 #define TOUCH_STATUS_MASK              0xfff
 /* MPR121 has 12 keys */
@@ -127,7 +128,7 @@ static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata,
                                      struct i2c_client *client)
 {
        const struct mpr121_init_register *reg;
-       unsigned char usl, lsl, tl;
+       unsigned char usl, lsl, tl, eleconf;
        int i, t, vdd, ret;
 
        /* Set up touch/release threshold for ele0-ele11 */
@@ -163,8 +164,15 @@ static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata,
        ret = i2c_smbus_write_byte_data(client, AUTO_CONFIG_USL_ADDR, usl);
        ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_LSL_ADDR, lsl);
        ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_TL_ADDR, tl);
+
+       /*
+        * Quick charge bit will let the capacitive charge to ready
+        * state quickly, or the buttons may not function after system
+        * boot.
+        */
+       eleconf = mpr121->keycount | ELECTRODE_CONF_QUICK_CHARGE;
        ret |= i2c_smbus_write_byte_data(client, ELECTRODE_CONF_ADDR,
-                                        mpr121->keycount);
+                                        eleconf);
        if (ret != 0)
                goto err_i2c_write;
 
index 6229c3e..e7cc51d 100644 (file)
@@ -700,9 +700,9 @@ static int __devinit pmic8xxx_kp_probe(struct platform_device *pdev)
        return 0;
 
 err_pmic_reg_read:
-       free_irq(kp->key_stuck_irq, NULL);
+       free_irq(kp->key_stuck_irq, kp);
 err_req_stuck_irq:
-       free_irq(kp->key_sense_irq, NULL);
+       free_irq(kp->key_sense_irq, kp);
 err_gpio_config:
 err_get_irq:
        input_free_device(kp->input);
@@ -717,8 +717,8 @@ static int __devexit pmic8xxx_kp_remove(struct platform_device *pdev)
        struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
 
        device_init_wakeup(&pdev->dev, 0);
-       free_irq(kp->key_stuck_irq, NULL);
-       free_irq(kp->key_sense_irq, NULL);
+       free_irq(kp->key_stuck_irq, kp);
+       free_irq(kp->key_sense_irq, kp);
        input_unregister_device(kp->input);
        kfree(kp);
 
index ca7b891..b21bf5b 100644 (file)
@@ -239,8 +239,6 @@ static int __devexit qt1070_remove(struct i2c_client *client)
        input_unregister_device(data->input);
        kfree(data);
 
-       i2c_set_clientdata(client, NULL);
-
        return 0;
 }
 
index 6876700..934aeb5 100644 (file)
@@ -291,7 +291,7 @@ static int __devexit sh_keysc_remove(struct platform_device *pdev)
        return 0;
 }
 
-#if CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_SLEEP
 static int sh_keysc_suspend(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
index 2b3b73e..da3828f 100644 (file)
@@ -657,7 +657,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
 
        input_set_drvdata(input_dev, kbc);
 
-       input_dev->evbit[0] = BIT_MASK(EV_KEY);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        input_set_capability(input_dev, EV_MSC, MSC_SCAN);
 
        input_dev->keycode = kbc->keycode;
index c8f097a..1c58681 100644 (file)
@@ -337,5 +337,5 @@ module_exit(keypad_exit);
 
 MODULE_AUTHOR("Cyril Chemparathy");
 MODULE_DESCRIPTION("TNETV107X Keypad Driver");
-MODULE_ALIAS("platform: tnetv107x-keypad");
+MODULE_ALIAS("platform:tnetv107x-keypad");
 MODULE_LICENSE("GPL");
index d1bf872..c9104bb 100644 (file)
@@ -100,6 +100,27 @@ config INPUT_MAX8925_ONKEY
          To compile this driver as a module, choose M here: the module
          will be called max8925_onkey.
 
+config INPUT_MMA8450
+       tristate "MMA8450 - Freescale's 3-Axis, 8/12-bit Digital Accelerometer"
+       depends on I2C
+       select INPUT_POLLDEV
+       help
+         Say Y here if you want to support Freescale's MMA8450 Accelerometer
+         through I2C interface.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mma8450.
+
+config INPUT_MPU3050
+       tristate "MPU3050 Triaxial gyroscope sensor"
+       depends on I2C
+       help
+         Say Y here if you want to support InvenSense MPU3050
+         connected via an I2C bus.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mpu3050.
+
 config INPUT_APANEL
        tristate "Fujitsu Lifebook Application Panel buttons"
        depends on X86 && I2C && LEDS_CLASS
@@ -209,6 +230,23 @@ config INPUT_KEYSPAN_REMOTE
          To compile this driver as a module, choose M here: the module will
          be called keyspan_remote.
 
+config INPUT_KXTJ9
+       tristate "Kionix KXTJ9 tri-axis digital accelerometer"
+       depends on I2C
+       help
+         Say Y here to enable support for the Kionix KXTJ9 digital tri-axis
+         accelerometer.
+
+         To compile this driver as a module, choose M here: the module will
+         be called kxtj9.
+
+config INPUT_KXTJ9_POLLED_MODE
+       bool "Enable polling mode support"
+       depends on INPUT_KXTJ9
+       select INPUT_POLLDEV
+       help
+         Say Y here if you need accelerometer to work in polling mode.
+
 config INPUT_POWERMATE
        tristate "Griffin PowerMate and Contour Jog support"
        depends on USB_ARCH_HAS_HCD
index 4da7c3a..299ad5e 100644 (file)
@@ -25,8 +25,11 @@ obj-$(CONFIG_INPUT_DM355EVM)         += dm355evm_keys.o
 obj-$(CONFIG_HP_SDC_RTC)               += hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)      += ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)     += keyspan_remote.o
+obj-$(CONFIG_INPUT_KXTJ9)              += kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)          += m68kspkr.o
 obj-$(CONFIG_INPUT_MAX8925_ONKEY)      += max8925_onkey.o
+obj-$(CONFIG_INPUT_MMA8450)            += mma8450.o
+obj-$(CONFIG_INPUT_MPU3050)            += mpu3050.o
 obj-$(CONFIG_INPUT_PCAP)               += pcap_keys.o
 obj-$(CONFIG_INPUT_PCF50633_PMU)       += pcf50633-input.o
 obj-$(CONFIG_INPUT_PCF8574)            += pcf8574_keypad.o
@@ -46,4 +49,3 @@ obj-$(CONFIG_INPUT_WISTRON_BTNS)      += wistron_btns.o
 obj-$(CONFIG_INPUT_WM831X_ON)          += wm831x-on.o
 obj-$(CONFIG_INPUT_XEN_KBDDEV_FRONTEND)        += xen-kbdfront.o
 obj-$(CONFIG_INPUT_YEALINK)            += yealink.o
-
index 4f72bdd..d00edc9 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
new file mode 100644 (file)
index 0000000..c456f63
--- /dev/null
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input/kxtj9.h>
+#include <linux/input-polldev.h>
+
+#define NAME                   "kxtj9"
+#define G_MAX                  8000
+/* OUTPUT REGISTERS */
+#define XOUT_L                 0x06
+#define WHO_AM_I               0x0F
+/* CONTROL REGISTERS */
+#define INT_REL                        0x1A
+#define CTRL_REG1              0x1B
+#define INT_CTRL1              0x1E
+#define DATA_CTRL              0x21
+/* CONTROL REGISTER 1 BITS */
+#define PC1_OFF                        0x7F
+#define PC1_ON                 (1 << 7)
+/* Data ready funtion enable bit: set during probe if using irq mode */
+#define DRDYE                  (1 << 5)
+/* INTERRUPT CONTROL REGISTER 1 BITS */
+/* Set these during probe if using irq mode */
+#define KXTJ9_IEL              (1 << 3)
+#define KXTJ9_IEA              (1 << 4)
+#define KXTJ9_IEN              (1 << 5)
+/* INPUT_ABS CONSTANTS */
+#define FUZZ                   3
+#define FLAT                   3
+/* RESUME STATE INDICES */
+#define RES_DATA_CTRL          0
+#define RES_CTRL_REG1          1
+#define RES_INT_CTRL1          2
+#define RESUME_ENTRIES         3
+
+/*
+ * The following table lists the maximum appropriate poll interval for each
+ * available output data rate.
+ */
+static const struct {
+       unsigned int cutoff;
+       u8 mask;
+} kxtj9_odr_table[] = {
+       { 3,    ODR800F },
+       { 5,    ODR400F },
+       { 10,   ODR200F },
+       { 20,   ODR100F },
+       { 40,   ODR50F  },
+       { 80,   ODR25F  },
+       { 0,    ODR12_5F},
+};
+
+struct kxtj9_data {
+       struct i2c_client *client;
+       struct kxtj9_platform_data pdata;
+       struct input_dev *input_dev;
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+       struct input_polled_dev *poll_dev;
+#endif
+       unsigned int last_poll_interval;
+       u8 shift;
+       u8 ctrl_reg1;
+       u8 data_ctrl;
+       u8 int_ctrl;
+};
+
+static int kxtj9_i2c_read(struct kxtj9_data *tj9, u8 addr, u8 *data, int len)
+{
+       struct i2c_msg msgs[] = {
+               {
+                       .addr = tj9->client->addr,
+                       .flags = tj9->client->flags,
+                       .len = 1,
+                       .buf = &addr,
+               },
+               {
+                       .addr = tj9->client->addr,
+                       .flags = tj9->client->flags | I2C_M_RD,
+                       .len = len,
+                       .buf = data,
+               },
+       };
+
+       return i2c_transfer(tj9->client->adapter, msgs, 2);
+}
+
+static void kxtj9_report_acceleration_data(struct kxtj9_data *tj9)
+{
+       s16 acc_data[3]; /* Data bytes from hardware xL, xH, yL, yH, zL, zH */
+       s16 x, y, z;
+       int err;
+
+       err = kxtj9_i2c_read(tj9, XOUT_L, (u8 *)acc_data, 6);
+       if (err < 0)
+               dev_err(&tj9->client->dev, "accelerometer data read failed\n");
+
+       x = le16_to_cpu(acc_data[tj9->pdata.axis_map_x]) >> tj9->shift;
+       y = le16_to_cpu(acc_data[tj9->pdata.axis_map_y]) >> tj9->shift;
+       z = le16_to_cpu(acc_data[tj9->pdata.axis_map_z]) >> tj9->shift;
+
+       input_report_abs(tj9->input_dev, ABS_X, tj9->pdata.negate_x ? -x : x);
+       input_report_abs(tj9->input_dev, ABS_Y, tj9->pdata.negate_y ? -y : y);
+       input_report_abs(tj9->input_dev, ABS_Z, tj9->pdata.negate_z ? -z : z);
+       input_sync(tj9->input_dev);
+}
+
+static irqreturn_t kxtj9_isr(int irq, void *dev)
+{
+       struct kxtj9_data *tj9 = dev;
+       int err;
+
+       /* data ready is the only possible interrupt type */
+       kxtj9_report_acceleration_data(tj9);
+
+       err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+       if (err < 0)
+               dev_err(&tj9->client->dev,
+                       "error clearing interrupt status: %d\n", err);
+
+       return IRQ_HANDLED;
+}
+
+static int kxtj9_update_g_range(struct kxtj9_data *tj9, u8 new_g_range)
+{
+       switch (new_g_range) {
+       case KXTJ9_G_2G:
+               tj9->shift = 4;
+               break;
+       case KXTJ9_G_4G:
+               tj9->shift = 3;
+               break;
+       case KXTJ9_G_8G:
+               tj9->shift = 2;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       tj9->ctrl_reg1 &= 0xe7;
+       tj9->ctrl_reg1 |= new_g_range;
+
+       return 0;
+}
+
+static int kxtj9_update_odr(struct kxtj9_data *tj9, unsigned int poll_interval)
+{
+       int err;
+       int i;
+
+       /* Use the lowest ODR that can support the requested poll interval */
+       for (i = 0; i < ARRAY_SIZE(kxtj9_odr_table); i++) {
+               tj9->data_ctrl = kxtj9_odr_table[i].mask;
+               if (poll_interval < kxtj9_odr_table[i].cutoff)
+                       break;
+       }
+
+       err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+       if (err < 0)
+               return err;
+
+       err = i2c_smbus_write_byte_data(tj9->client, DATA_CTRL, tj9->data_ctrl);
+       if (err < 0)
+               return err;
+
+       err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+static int kxtj9_device_power_on(struct kxtj9_data *tj9)
+{
+       if (tj9->pdata.power_on)
+               return tj9->pdata.power_on();
+
+       return 0;
+}
+
+static void kxtj9_device_power_off(struct kxtj9_data *tj9)
+{
+       int err;
+
+       tj9->ctrl_reg1 &= PC1_OFF;
+       err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+       if (err < 0)
+               dev_err(&tj9->client->dev, "soft power off failed\n");
+
+       if (tj9->pdata.power_off)
+               tj9->pdata.power_off();
+}
+
+static int kxtj9_enable(struct kxtj9_data *tj9)
+{
+       int err;
+
+       err = kxtj9_device_power_on(tj9);
+       if (err < 0)
+               return err;
+
+       /* ensure that PC1 is cleared before updating control registers */
+       err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+       if (err < 0)
+               return err;
+
+       /* only write INT_CTRL_REG1 if in irq mode */
+       if (tj9->client->irq) {
+               err = i2c_smbus_write_byte_data(tj9->client,
+                                               INT_CTRL1, tj9->int_ctrl);
+               if (err < 0)
+                       return err;
+       }
+
+       err = kxtj9_update_g_range(tj9, tj9->pdata.g_range);
+       if (err < 0)
+               return err;
+
+       /* turn on outputs */
+       tj9->ctrl_reg1 |= PC1_ON;
+       err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+       if (err < 0)
+               return err;
+
+       err = kxtj9_update_odr(tj9, tj9->last_poll_interval);
+       if (err < 0)
+               return err;
+
+       /* clear initial interrupt if in irq mode */
+       if (tj9->client->irq) {
+               err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+               if (err < 0) {
+                       dev_err(&tj9->client->dev,
+                               "error clearing interrupt: %d\n", err);
+                       goto fail;
+               }
+       }
+
+       return 0;
+
+fail:
+       kxtj9_device_power_off(tj9);
+       return err;
+}
+
+static void kxtj9_disable(struct kxtj9_data *tj9)
+{
+       kxtj9_device_power_off(tj9);
+}
+
+static int kxtj9_input_open(struct input_dev *input)
+{
+       struct kxtj9_data *tj9 = input_get_drvdata(input);
+
+       return kxtj9_enable(tj9);
+}
+
+static void kxtj9_input_close(struct input_dev *dev)
+{
+       struct kxtj9_data *tj9 = input_get_drvdata(dev);
+
+       kxtj9_disable(tj9);
+}
+
+static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9,
+                                             struct input_dev *input_dev)
+{
+       __set_bit(EV_ABS, input_dev->evbit);
+       input_set_abs_params(input_dev, ABS_X, -G_MAX, G_MAX, FUZZ, FLAT);
+       input_set_abs_params(input_dev, ABS_Y, -G_MAX, G_MAX, FUZZ, FLAT);
+       input_set_abs_params(input_dev, ABS_Z, -G_MAX, G_MAX, FUZZ, FLAT);
+
+       input_dev->name = "kxtj9_accel";
+       input_dev->id.bustype = BUS_I2C;
+       input_dev->dev.parent = &tj9->client->dev;
+}
+
+static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9)
+{
+       struct input_dev *input_dev;
+       int err;
+
+       input_dev = input_allocate_device();
+       if (!input_dev) {
+               dev_err(&tj9->client->dev, "input device allocate failed\n");
+               return -ENOMEM;
+       }
+
+       tj9->input_dev = input_dev;
+
+       input_dev->open = kxtj9_input_open;
+       input_dev->close = kxtj9_input_close;
+       input_set_drvdata(input_dev, tj9);
+
+       kxtj9_init_input_device(tj9, input_dev);
+
+       err = input_register_device(tj9->input_dev);
+       if (err) {
+               dev_err(&tj9->client->dev,
+                       "unable to register input polled device %s: %d\n",
+                       tj9->input_dev->name, err);
+               input_free_device(tj9->input_dev);
+               return err;
+       }
+
+       return 0;
+}
+
+/*
+ * When IRQ mode is selected, we need to provide an interface to allow the user
+ * to change the output data rate of the part.  For consistency, we are using
+ * the set_poll method, which accepts a poll interval in milliseconds, and then
+ * calls update_odr() while passing this value as an argument.  In IRQ mode, the
+ * data outputs will not be read AT the requested poll interval, rather, the
+ * lowest ODR that can support the requested interval.  The client application
+ * will be responsible for retrieving data from the input node at the desired
+ * interval.
+ */
+
+/* Returns currently selected poll interval (in ms) */
+static ssize_t kxtj9_get_poll(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+       return sprintf(buf, "%d\n", tj9->last_poll_interval);
+}
+
+/* Allow users to select a new poll interval (in ms) */
+static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr,
+                                               const char *buf, size_t count)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+       struct input_dev *input_dev = tj9->input_dev;
+       unsigned int interval;
+       int error;
+
+       error = kstrtouint(buf, 10, &interval);
+       if (error < 0)
+               return error;
+
+       /* Lock the device to prevent races with open/close (and itself) */
+       mutex_lock(&input_dev->mutex);
+
+       disable_irq(client->irq);
+
+       /*
+        * Set current interval to the greater of the minimum interval or
+        * the requested interval
+        */
+       tj9->last_poll_interval = max(interval, tj9->pdata.min_interval);
+
+       kxtj9_update_odr(tj9, tj9->last_poll_interval);
+
+       enable_irq(client->irq);
+       mutex_unlock(&input_dev->mutex);
+
+       return count;
+}
+
+static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll);
+
+static struct attribute *kxtj9_attributes[] = {
+       &dev_attr_poll.attr,
+       NULL
+};
+
+static struct attribute_group kxtj9_attribute_group = {
+       .attrs = kxtj9_attributes
+};
+
+
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+static void kxtj9_poll(struct input_polled_dev *dev)
+{
+       struct kxtj9_data *tj9 = dev->private;
+       unsigned int poll_interval = dev->poll_interval;
+
+       kxtj9_report_acceleration_data(tj9);
+
+       if (poll_interval != tj9->last_poll_interval) {
+               kxtj9_update_odr(tj9, poll_interval);
+               tj9->last_poll_interval = poll_interval;
+       }
+}
+
+static void kxtj9_polled_input_open(struct input_polled_dev *dev)
+{
+       struct kxtj9_data *tj9 = dev->private;
+
+       kxtj9_enable(tj9);
+}
+
+static void kxtj9_polled_input_close(struct input_polled_dev *dev)
+{
+       struct kxtj9_data *tj9 = dev->private;
+
+       kxtj9_disable(tj9);
+}
+
+static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+       int err;
+       struct input_polled_dev *poll_dev;
+       poll_dev = input_allocate_polled_device();
+
+       if (!poll_dev) {
+               dev_err(&tj9->client->dev,
+                       "Failed to allocate polled device\n");
+               return -ENOMEM;
+       }
+
+       tj9->poll_dev = poll_dev;
+       tj9->input_dev = poll_dev->input;
+
+       poll_dev->private = tj9;
+       poll_dev->poll = kxtj9_poll;
+       poll_dev->open = kxtj9_polled_input_open;
+       poll_dev->close = kxtj9_polled_input_close;
+
+       kxtj9_init_input_device(tj9, poll_dev->input);
+
+       err = input_register_polled_device(poll_dev);
+       if (err) {
+               dev_err(&tj9->client->dev,
+                       "Unable to register polled device, err=%d\n", err);
+               input_free_polled_device(poll_dev);
+               return err;
+       }
+
+       return 0;
+}
+
+static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+       input_unregister_polled_device(tj9->poll_dev);
+       input_free_polled_device(tj9->poll_dev);
+}
+
+#else
+
+static inline int kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+       return -ENOSYS;
+}
+
+static inline void kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+}
+
+#endif
+
+static int __devinit kxtj9_verify(struct kxtj9_data *tj9)
+{
+       int retval;
+
+       retval = kxtj9_device_power_on(tj9);
+       if (retval < 0)
+               return retval;
+
+       retval = i2c_smbus_read_byte_data(tj9->client, WHO_AM_I);
+       if (retval < 0) {
+               dev_err(&tj9->client->dev, "read err int source\n");
+               goto out;
+       }
+
+       retval = retval != 0x06 ? -EIO : 0;
+
+out:
+       kxtj9_device_power_off(tj9);
+       return retval;
+}
+
+static int __devinit kxtj9_probe(struct i2c_client *client,
+                                const struct i2c_device_id *id)
+{
+       const struct kxtj9_platform_data *pdata = client->dev.platform_data;
+       struct kxtj9_data *tj9;
+       int err;
+
+       if (!i2c_check_functionality(client->adapter,
+                               I2C_FUNC_I2C | I2C_FUNC_SMBUS_BYTE_DATA)) {
+               dev_err(&client->dev, "client is not i2c capable\n");
+               return -ENXIO;
+       }
+
+       if (!pdata) {
+               dev_err(&client->dev, "platform data is NULL; exiting\n");
+               return -EINVAL;
+       }
+
+       tj9 = kzalloc(sizeof(*tj9), GFP_KERNEL);
+       if (!tj9) {
+               dev_err(&client->dev,
+                       "failed to allocate memory for module data\n");
+               return -ENOMEM;
+       }
+
+       tj9->client = client;
+       tj9->pdata = *pdata;
+
+       if (pdata->init) {
+               err = pdata->init();
+               if (err < 0)
+                       goto err_free_mem;
+       }
+
+       err = kxtj9_verify(tj9);
+       if (err < 0) {
+               dev_err(&client->dev, "device not recognized\n");
+               goto err_pdata_exit;
+       }
+
+       i2c_set_clientdata(client, tj9);
+
+       tj9->ctrl_reg1 = tj9->pdata.res_12bit | tj9->pdata.g_range;
+       tj9->data_ctrl = tj9->pdata.data_odr_init;
+
+       if (client->irq) {
+               /* If in irq mode, populate INT_CTRL_REG1 and enable DRDY. */
+               tj9->int_ctrl |= KXTJ9_IEN | KXTJ9_IEA | KXTJ9_IEL;
+               tj9->ctrl_reg1 |= DRDYE;
+
+               err = kxtj9_setup_input_device(tj9);
+               if (err)
+                       goto err_pdata_exit;
+
+               err = request_threaded_irq(client->irq, NULL, kxtj9_isr,
+                                          IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+                                          "kxtj9-irq", tj9);
+               if (err) {
+                       dev_err(&client->dev, "request irq failed: %d\n", err);
+                       goto err_destroy_input;
+               }
+
+               err = sysfs_create_group(&client->dev.kobj, &kxtj9_attribute_group);
+               if (err) {
+                       dev_err(&client->dev, "sysfs create failed: %d\n", err);
+                       goto err_free_irq;
+               }
+
+       } else {
+               err = kxtj9_setup_polled_device(tj9);
+               if (err)
+                       goto err_pdata_exit;
+       }
+
+       return 0;
+
+err_free_irq:
+       free_irq(client->irq, tj9);
+err_destroy_input:
+       input_unregister_device(tj9->input_dev);
+err_pdata_exit:
+       if (tj9->pdata.exit)
+               tj9->pdata.exit();
+err_free_mem:
+       kfree(tj9);
+       return err;
+}
+
+static int __devexit kxtj9_remove(struct i2c_client *client)
+{
+       struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+       if (client->irq) {
+               sysfs_remove_group(&client->dev.kobj, &kxtj9_attribute_group);
+               free_irq(client->irq, tj9);
+               input_unregister_device(tj9->input_dev);
+       } else {
+               kxtj9_teardown_polled_device(tj9);
+       }
+
+       if (tj9->pdata.exit)
+               tj9->pdata.exit();
+
+       kfree(tj9);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int kxtj9_suspend(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+       struct input_dev *input_dev = tj9->input_dev;
+
+       mutex_lock(&input_dev->mutex);
+
+       if (input_dev->users)
+               kxtj9_disable(tj9);
+
+       mutex_unlock(&input_dev->mutex);
+       return 0;
+}
+
+static int kxtj9_resume(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+       struct input_dev *input_dev = tj9->input_dev;
+       int retval = 0;
+
+       mutex_lock(&input_dev->mutex);
+
+       if (input_dev->users)
+               kxtj9_enable(tj9);
+
+       mutex_unlock(&input_dev->mutex);
+       return retval;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume);
+
+static const struct i2c_device_id kxtj9_id[] = {
+       { NAME, 0 },
+       { },
+};
+
+MODULE_DEVICE_TABLE(i2c, kxtj9_id);
+
+static struct i2c_driver kxtj9_driver = {
+       .driver = {
+               .name   = NAME,
+               .owner  = THIS_MODULE,
+               .pm     = &kxtj9_pm_ops,
+       },
+       .probe          = kxtj9_probe,
+       .remove         = __devexit_p(kxtj9_remove),
+       .id_table       = kxtj9_id,
+};
+
+static int __init kxtj9_init(void)
+{
+       return i2c_add_driver(&kxtj9_driver);
+}
+module_init(kxtj9_init);
+
+static void __exit kxtj9_exit(void)
+{
+       i2c_del_driver(&kxtj9_driver);
+}
+module_exit(kxtj9_exit);
+
+MODULE_DESCRIPTION("KXTJ9 accelerometer driver");
+MODULE_AUTHOR("Chris Hudson <chudson@kionix.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
new file mode 100644 (file)
index 0000000..20f8f92
--- /dev/null
@@ -0,0 +1,256 @@
+/*
+ *  Driver for Freescale's 3-Axis Accelerometer MMA8450
+ *
+ *  Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input-polldev.h>
+
+#define MMA8450_DRV_NAME       "mma8450"
+
+#define MODE_CHANGE_DELAY_MS   100
+#define POLL_INTERVAL          100
+#define POLL_INTERVAL_MAX      500
+
+/* register definitions */
+#define MMA8450_STATUS         0x00
+#define MMA8450_STATUS_ZXYDR   0x08
+
+#define MMA8450_OUT_X8         0x01
+#define MMA8450_OUT_Y8         0x02
+#define MMA8450_OUT_Z8         0x03
+
+#define MMA8450_OUT_X_LSB      0x05
+#define MMA8450_OUT_X_MSB      0x06
+#define MMA8450_OUT_Y_LSB      0x07
+#define MMA8450_OUT_Y_MSB      0x08
+#define MMA8450_OUT_Z_LSB      0x09
+#define MMA8450_OUT_Z_MSB      0x0a
+
+#define MMA8450_XYZ_DATA_CFG   0x16
+
+#define MMA8450_CTRL_REG1      0x38
+#define MMA8450_CTRL_REG2      0x39
+
+/* mma8450 status */
+struct mma8450 {
+       struct i2c_client       *client;
+       struct input_polled_dev *idev;
+};
+
+static int mma8450_read(struct mma8450 *m, unsigned off)
+{
+       struct i2c_client *c = m->client;
+       int ret;
+
+       ret = i2c_smbus_read_byte_data(c, off);
+       if (ret < 0)
+               dev_err(&c->dev,
+                       "failed to read register 0x%02x, error %d\n",
+                       off, ret);
+
+       return ret;
+}
+
+static int mma8450_write(struct mma8450 *m, unsigned off, u8 v)
+{
+       struct i2c_client *c = m->client;
+       int error;
+
+       error = i2c_smbus_write_byte_data(c, off, v);
+       if (error < 0) {
+               dev_err(&c->dev,
+                       "failed to write to register 0x%02x, error %d\n",
+                       off, error);
+               return error;
+       }
+
+       return 0;
+}
+
+static int mma8450_read_xyz(struct mma8450 *m, int *x, int *y, int *z)
+{
+       struct i2c_client *c = m->client;
+       u8 buff[6];
+       int err;
+
+       err = i2c_smbus_read_i2c_block_data(c, MMA8450_OUT_X_LSB, 6, buff);
+       if (err < 0) {
+               dev_err(&c->dev,
+                       "failed to read block data at 0x%02x, error %d\n",
+                       MMA8450_OUT_X_LSB, err);
+               return err;
+       }
+
+       *x = ((buff[1] << 4) & 0xff0) | (buff[0] & 0xf);
+       *y = ((buff[3] << 4) & 0xff0) | (buff[2] & 0xf);
+       *z = ((buff[5] << 4) & 0xff0) | (buff[4] & 0xf);
+
+       return 0;
+}
+
+static void mma8450_poll(struct input_polled_dev *dev)
+{
+       struct mma8450 *m = dev->private;
+       int x, y, z;
+       int ret;
+       int err;
+
+       ret = mma8450_read(m, MMA8450_STATUS);
+       if (ret < 0)
+               return;
+
+       if (!(ret & MMA8450_STATUS_ZXYDR))
+               return;
+
+       err = mma8450_read_xyz(m, &x, &y, &z);
+       if (err)
+               return;
+
+       input_report_abs(dev->input, ABS_X, x);
+       input_report_abs(dev->input, ABS_Y, y);
+       input_report_abs(dev->input, ABS_Z, z);
+       input_sync(dev->input);
+}
+
+/* Initialize the MMA8450 chip */
+static void mma8450_open(struct input_polled_dev *dev)
+{
+       struct mma8450 *m = dev->private;
+       int err;
+
+       /* enable all events from X/Y/Z, no FIFO */
+       err = mma8450_write(m, MMA8450_XYZ_DATA_CFG, 0x07);
+       if (err)
+               return;
+
+       /*
+        * Sleep mode poll rate - 50Hz
+        * System output data rate - 400Hz
+        * Full scale selection - Active, +/- 2G
+        */
+       err = mma8450_write(m, MMA8450_CTRL_REG1, 0x01);
+       if (err < 0)
+               return;
+
+       msleep(MODE_CHANGE_DELAY_MS);
+}
+
+static void mma8450_close(struct input_polled_dev *dev)
+{
+       struct mma8450 *m = dev->private;
+
+       mma8450_write(m, MMA8450_CTRL_REG1, 0x00);
+       mma8450_write(m, MMA8450_CTRL_REG2, 0x01);
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+static int __devinit mma8450_probe(struct i2c_client *c,
+                                  const struct i2c_device_id *id)
+{
+       struct input_polled_dev *idev;
+       struct mma8450 *m;
+       int err;
+
+       m = kzalloc(sizeof(struct mma8450), GFP_KERNEL);
+       idev = input_allocate_polled_device();
+       if (!m || !idev) {
+               err = -ENOMEM;
+               goto err_free_mem;
+       }
+
+       m->client = c;
+       m->idev = idev;
+
+       idev->private           = m;
+       idev->input->name       = MMA8450_DRV_NAME;
+       idev->input->id.bustype = BUS_I2C;
+       idev->poll              = mma8450_poll;
+       idev->poll_interval     = POLL_INTERVAL;
+       idev->poll_interval_max = POLL_INTERVAL_MAX;
+       idev->open              = mma8450_open;
+       idev->close             = mma8450_close;
+
+       __set_bit(EV_ABS, idev->input->evbit);
+       input_set_abs_params(idev->input, ABS_X, -2048, 2047, 32, 32);
+       input_set_abs_params(idev->input, ABS_Y, -2048, 2047, 32, 32);
+       input_set_abs_params(idev->input, ABS_Z, -2048, 2047, 32, 32);
+
+       err = input_register_polled_device(idev);
+       if (err) {
+               dev_err(&c->dev, "failed to register polled input device\n");
+               goto err_free_mem;
+       }
+
+       return 0;
+
+err_free_mem:
+       input_free_polled_device(idev);
+       kfree(m);
+       return err;
+}
+
+static int __devexit mma8450_remove(struct i2c_client *c)
+{
+       struct mma8450 *m = i2c_get_clientdata(c);
+       struct input_polled_dev *idev = m->idev;
+
+       input_unregister_polled_device(idev);
+       input_free_polled_device(idev);
+       kfree(m);
+
+       return 0;
+}
+
+static const struct i2c_device_id mma8450_id[] = {
+       { MMA8450_DRV_NAME, 0 },
+       { },
+};
+MODULE_DEVICE_TABLE(i2c, mma8450_id);
+
+static struct i2c_driver mma8450_driver = {
+       .driver = {
+               .name   = MMA8450_DRV_NAME,
+               .owner  = THIS_MODULE,
+       },
+       .probe          = mma8450_probe,
+       .remove         = __devexit_p(mma8450_remove),
+       .id_table       = mma8450_id,
+};
+
+static int __init mma8450_init(void)
+{
+       return i2c_add_driver(&mma8450_driver);
+}
+module_init(mma8450_init);
+
+static void __exit mma8450_exit(void)
+{
+       i2c_del_driver(&mma8450_driver);
+}
+module_exit(mma8450_exit);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MMA8450 3-Axis Accelerometer Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c
new file mode 100644 (file)
index 0000000..b95fac1
--- /dev/null
@@ -0,0 +1,376 @@
+/*
+ * MPU3050 Tri-axis gyroscope driver
+ *
+ * Copyright (C) 2011 Wistron Co.Ltd
+ * Joseph Lai <joseph_lai@wistron.com>
+ *
+ * Trimmed down by Alan Cox <alan@linux.intel.com> to produce this version
+ *
+ * This is a 'lite' version of the driver, while we consider the right way
+ * to present the other features to user space. In particular it requires the
+ * device has an IRQ, and it only provides an input interface, so is not much
+ * use for device orientation. A fuller version is available from the Meego
+ * tree.
+ *
+ * This program is based on bma023.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#define MPU3050_CHIP_ID_REG    0x00
+#define MPU3050_CHIP_ID                0x69
+#define MPU3050_XOUT_H         0x1D
+#define MPU3050_PWR_MGM                0x3E
+#define MPU3050_PWR_MGM_POS    6
+#define MPU3050_PWR_MGM_MASK   0x40
+
+#define MPU3050_AUTO_DELAY     1000
+
+#define MPU3050_MIN_VALUE      -32768
+#define MPU3050_MAX_VALUE      32767
+
+struct axis_data {
+       s16 x;
+       s16 y;
+       s16 z;
+};
+
+struct mpu3050_sensor {
+       struct i2c_client *client;
+       struct device *dev;
+       struct input_dev *idev;
+};
+
+/**
+ *     mpu3050_xyz_read_reg    -       read the axes values
+ *     @buffer: provide register addr and get register
+ *     @length: length of register
+ *
+ *     Reads the register values in one transaction or returns a negative
+ *     error code on failure.
+ */
+static int mpu3050_xyz_read_reg(struct i2c_client *client,
+                              u8 *buffer, int length)
+{
+       /*
+        * Annoying we can't make this const because the i2c layer doesn't
+        * declare input buffers const.
+        */
+       char cmd = MPU3050_XOUT_H;
+       struct i2c_msg msg[] = {
+               {
+                       .addr = client->addr,
+                       .flags = 0,
+                       .len = 1,
+                       .buf = &cmd,
+               },
+               {
+                       .addr = client->addr,
+                       .flags = I2C_M_RD,
+                       .len = length,
+                       .buf = buffer,
+               },
+       };
+
+       return i2c_transfer(client->adapter, msg, 2);
+}
+
+/**
+ *     mpu3050_read_xyz        -       get co-ordinates from device
+ *     @client: i2c address of sensor
+ *     @coords: co-ordinates to update
+ *
+ *     Return the converted X Y and Z co-ordinates from the sensor device
+ */
+static void mpu3050_read_xyz(struct i2c_client *client,
+                            struct axis_data *coords)
+{
+       u16 buffer[3];
+
+       mpu3050_xyz_read_reg(client, (u8 *)buffer, 6);
+       coords->x = be16_to_cpu(buffer[0]);
+       coords->y = be16_to_cpu(buffer[1]);
+       coords->z = be16_to_cpu(buffer[2]);
+       dev_dbg(&client->dev, "%s: x %d, y %d, z %d\n", __func__,
+                                       coords->x, coords->y, coords->z);
+}
+
+/**
+ *     mpu3050_set_power_mode  -       set the power mode
+ *     @client: i2c client for the sensor
+ *     @val: value to switch on/off of power, 1: normal power, 0: low power
+ *
+ *     Put device to normal-power mode or low-power mode.
+ */
+static void mpu3050_set_power_mode(struct i2c_client *client, u8 val)
+{
+       u8 value;
+
+       value = i2c_smbus_read_byte_data(client, MPU3050_PWR_MGM);
+       value = (value & ~MPU3050_PWR_MGM_MASK) |
+               (((val << MPU3050_PWR_MGM_POS) & MPU3050_PWR_MGM_MASK) ^
+                MPU3050_PWR_MGM_MASK);
+       i2c_smbus_write_byte_data(client, MPU3050_PWR_MGM, value);
+}
+
+/**
+ *     mpu3050_input_open      -       called on input event open
+ *     @input: input dev of opened device
+ *
+ *     The input layer calls this function when input event is opened. The
+ *     function will push the device to resume. Then, the device is ready
+ *     to provide data.
+ */
+static int mpu3050_input_open(struct input_dev *input)
+{
+       struct mpu3050_sensor *sensor = input_get_drvdata(input);
+
+       pm_runtime_get(sensor->dev);
+
+       return 0;
+}
+
+/**
+ *     mpu3050_input_close     -       called on input event close
+ *     @input: input dev of closed device
+ *
+ *     The input layer calls this function when input event is closed. The
+ *     function will push the device to suspend.
+ */
+static void mpu3050_input_close(struct input_dev *input)
+{
+       struct mpu3050_sensor *sensor = input_get_drvdata(input);
+
+       pm_runtime_put(sensor->dev);
+}
+
+/**
+ *     mpu3050_interrupt_thread        -       handle an IRQ
+ *     @irq: interrupt numner
+ *     @data: the sensor
+ *
+ *     Called by the kernel single threaded after an interrupt occurs. Read
+ *     the sensor data and generate an input event for it.
+ */
+static irqreturn_t mpu3050_interrupt_thread(int irq, void *data)
+{
+       struct mpu3050_sensor *sensor = data;
+       struct axis_data axis;
+
+       mpu3050_read_xyz(sensor->client, &axis);
+
+       input_report_abs(sensor->idev, ABS_X, axis.x);
+       input_report_abs(sensor->idev, ABS_Y, axis.y);
+       input_report_abs(sensor->idev, ABS_Z, axis.z);
+       input_sync(sensor->idev);
+
+       return IRQ_HANDLED;
+}
+
+/**
+ *     mpu3050_probe   -       device detection callback
+ *     @client: i2c client of found device
+ *     @id: id match information
+ *
+ *     The I2C layer calls us when it believes a sensor is present at this
+ *     address. Probe to see if this is correct and to validate the device.
+ *
+ *     If present install the relevant sysfs interfaces and input device.
+ */
+static int __devinit mpu3050_probe(struct i2c_client *client,
+                                  const struct i2c_device_id *id)
+{
+       struct mpu3050_sensor *sensor;
+       struct input_dev *idev;
+       int ret;
+       int error;
+
+       sensor = kzalloc(sizeof(struct mpu3050_sensor), GFP_KERNEL);
+       idev = input_allocate_device();
+       if (!sensor || !idev) {
+               dev_err(&client->dev, "failed to allocate driver data\n");
+               error = -ENOMEM;
+               goto err_free_mem;
+       }
+
+       sensor->client = client;
+       sensor->dev = &client->dev;
+       sensor->idev = idev;
+
+       mpu3050_set_power_mode(client, 1);
+       msleep(10);
+
+       ret = i2c_smbus_read_byte_data(client, MPU3050_CHIP_ID_REG);
+       if (ret < 0) {
+               dev_err(&client->dev, "failed to detect device\n");
+               error = -ENXIO;
+               goto err_free_mem;
+       }
+
+       if (ret != MPU3050_CHIP_ID) {
+               dev_err(&client->dev, "unsupported chip id\n");
+               error = -ENXIO;
+               goto err_free_mem;
+       }
+
+       idev->name = "MPU3050";
+       idev->id.bustype = BUS_I2C;
+       idev->dev.parent = &client->dev;
+
+       idev->open = mpu3050_input_open;
+       idev->close = mpu3050_input_close;
+
+       __set_bit(EV_ABS, idev->evbit);
+       input_set_abs_params(idev, ABS_X,
+                            MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+       input_set_abs_params(idev, ABS_Y,
+                            MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+       input_set_abs_params(idev, ABS_Z,
+                            MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+
+       input_set_drvdata(idev, sensor);
+
+       pm_runtime_set_active(&client->dev);
+
+       error = request_threaded_irq(client->irq,
+                                    NULL, mpu3050_interrupt_thread,
+                                    IRQF_TRIGGER_RISING,
+                                    "mpu_int", sensor);
+       if (error) {
+               dev_err(&client->dev,
+                       "can't get IRQ %d, error %d\n", client->irq, error);
+               goto err_pm_set_suspended;
+       }
+
+       error = input_register_device(idev);
+       if (error) {
+               dev_err(&client->dev, "failed to register input device\n");
+               goto err_free_irq;
+       }
+
+       pm_runtime_enable(&client->dev);
+       pm_runtime_set_autosuspend_delay(&client->dev, MPU3050_AUTO_DELAY);
+
+       return 0;
+
+err_free_irq:
+       free_irq(client->irq, sensor);
+err_pm_set_suspended:
+       pm_runtime_set_suspended(&client->dev);
+err_free_mem:
+       input_unregister_device(idev);
+       kfree(sensor);
+       return error;
+}
+
+/**
+ *     mpu3050_remove  -       remove a sensor
+ *     @client: i2c client of sensor being removed
+ *
+ *     Our sensor is going away, clean up the resources.
+ */
+static int __devexit mpu3050_remove(struct i2c_client *client)
+{
+       struct mpu3050_sensor *sensor = i2c_get_clientdata(client);
+
+       pm_runtime_disable(&client->dev);
+       pm_runtime_set_suspended(&client->dev);
+
+       free_irq(client->irq, sensor);
+       input_unregister_device(sensor->idev);
+       kfree(sensor);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+/**
+ *     mpu3050_suspend         -       called on device suspend
+ *     @dev: device being suspended
+ *
+ *     Put the device into sleep mode before we suspend the machine.
+ */
+static int mpu3050_suspend(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+
+       mpu3050_set_power_mode(client, 0);
+
+       return 0;
+}
+
+/**
+ *     mpu3050_resume          -       called on device resume
+ *     @dev: device being resumed
+ *
+ *     Put the device into powered mode on resume.
+ */
+static int mpu3050_resume(struct device *dev)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+
+       mpu3050_set_power_mode(client, 1);
+       msleep(100);  /* wait for gyro chip resume */
+
+       return 0;
+}
+#endif
+
+static UNIVERSAL_DEV_PM_OPS(mpu3050_pm, mpu3050_suspend, mpu3050_resume, NULL);
+
+static const struct i2c_device_id mpu3050_ids[] = {
+       { "mpu3050", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, mpu3050_ids);
+
+static struct i2c_driver mpu3050_i2c_driver = {
+       .driver = {
+               .name   = "mpu3050",
+               .owner  = THIS_MODULE,
+               .pm     = &mpu3050_pm,
+       },
+       .probe          = mpu3050_probe,
+       .remove         = __devexit_p(mpu3050_remove),
+       .id_table       = mpu3050_ids,
+};
+
+static int __init mpu3050_init(void)
+{
+       return i2c_add_driver(&mpu3050_i2c_driver);
+}
+module_init(mpu3050_init);
+
+static void __exit mpu3050_exit(void)
+{
+       i2c_del_driver(&mpu3050_i2c_driver);
+}
+module_exit(mpu3050_exit);
+
+MODULE_AUTHOR("Wistron Corp.");
+MODULE_DESCRIPTION("MPU3050 Tri-axis gyroscope driver");
+MODULE_LICENSE("GPL");
index 62bae99..ad2e51c 100644 (file)
@@ -373,7 +373,7 @@ static struct xenbus_driver xenkbd_driver = {
 
 static int __init xenkbd_init(void)
 {
-       if (!xen_pv_domain())
+       if (!xen_domain())
                return -ENODEV;
 
        /* Nothing to do if running in dom0. */
index 7b6ce17..58902fb 100644 (file)
@@ -191,7 +191,7 @@ static void __exit gpio_mouse_exit(void)
 }
 module_exit(gpio_mouse_exit);
 
-MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("GPIO mouse driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:gpio_mouse"); /* work with hotplug and coldplug */
index c31ad11..83bcaba 100644 (file)
@@ -33,7 +33,7 @@ static const char *desired_serio_phys;
 static int lifebook_limit_serio3(const struct dmi_system_id *d)
 {
        desired_serio_phys = "isa0060/serio3";
-       return 0;
+       return 1;
 }
 
 static bool lifebook_use_6byte_proto;
@@ -41,7 +41,7 @@ static bool lifebook_use_6byte_proto;
 static int lifebook_set_6byte_proto(const struct dmi_system_id *d)
 {
        lifebook_use_6byte_proto = true;
-       return 0;
+       return 1;
 }
 
 static const struct dmi_system_id __initconst lifebook_dmi_table[] = {
index 943cfec..6c5d84f 100644 (file)
@@ -12,7 +12,6 @@
 
 #include <linux/init.h>
 #include <linux/input.h>
-#include <linux/version.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
index 1242775..2fc887a 100644 (file)
@@ -20,7 +20,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/input.h>
 #include <linux/ctype.h>
 #include <linux/libps2.h>
index e06e045..5538fc6 100644 (file)
@@ -207,27 +207,37 @@ static int synaptics_identify(struct psmouse *psmouse)
 static int synaptics_resolution(struct psmouse *psmouse)
 {
        struct synaptics_data *priv = psmouse->private;
-       unsigned char res[3];
-       unsigned char max[3];
+       unsigned char resp[3];
 
        if (SYN_ID_MAJOR(priv->identity) < 4)
                return 0;
 
-       if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, res) == 0) {
-               if (res[0] != 0 && (res[1] & 0x80) && res[2] != 0) {
-                       priv->x_res = res[0]; /* x resolution in units/mm */
-                       priv->y_res = res[2]; /* y resolution in units/mm */
+       if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, resp) == 0) {
+               if (resp[0] != 0 && (resp[1] & 0x80) && resp[2] != 0) {
+                       priv->x_res = resp[0]; /* x resolution in units/mm */
+                       priv->y_res = resp[2]; /* y resolution in units/mm */
                }
        }
 
        if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 &&
            SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) {
-               if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_DIMENSIONS, max)) {
-                       printk(KERN_ERR "Synaptics claims to have dimensions query,"
-                              " but I'm not able to read it.\n");
+               if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) {
+                       printk(KERN_ERR "Synaptics claims to have max coordinates"
+                              " query, but I'm not able to read it.\n");
+               } else {
+                       priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
+                       priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
+               }
+       }
+
+       if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 &&
+           SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) {
+               if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) {
+                       printk(KERN_ERR "Synaptics claims to have min coordinates"
+                              " query, but I'm not able to read it.\n");
                } else {
-                       priv->x_max = (max[0] << 5) | ((max[1] & 0x0f) << 1);
-                       priv->y_max = (max[2] << 5) | ((max[1] & 0xf0) >> 3);
+                       priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
+                       priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
                }
        }
 
@@ -406,26 +416,10 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
        memset(hw, 0, sizeof(struct synaptics_hw_state));
 
        if (SYN_MODEL_NEWABS(priv->model_id)) {
-               hw->x = (((buf[3] & 0x10) << 8) |
-                        ((buf[1] & 0x0f) << 8) |
-                        buf[4]);
-               hw->y = (((buf[3] & 0x20) << 7) |
-                        ((buf[1] & 0xf0) << 4) |
-                        buf[5]);
-
-               hw->z = buf[2];
                hw->w = (((buf[0] & 0x30) >> 2) |
                         ((buf[0] & 0x04) >> 1) |
                         ((buf[3] & 0x04) >> 2));
 
-               if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) {
-                       /* Gesture packet: (x, y, z) at half resolution */
-                       priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1;
-                       priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1;
-                       priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1;
-                       return 1;
-               }
-
                hw->left  = (buf[0] & 0x01) ? 1 : 0;
                hw->right = (buf[0] & 0x02) ? 1 : 0;
 
@@ -448,6 +442,22 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
                        hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0;
                }
 
+               if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) {
+                       /* Gesture packet: (x, y, z) at half resolution */
+                       priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1;
+                       priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1;
+                       priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1;
+                       return 1;
+               }
+
+               hw->x = (((buf[3] & 0x10) << 8) |
+                        ((buf[1] & 0x0f) << 8) |
+                        buf[4]);
+               hw->y = (((buf[3] & 0x20) << 7) |
+                        ((buf[1] & 0xf0) << 4) |
+                        buf[5]);
+               hw->z = buf[2];
+
                if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) &&
                    ((buf[0] ^ buf[3]) & 0x02)) {
                        switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) {
@@ -485,7 +495,8 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
        return 0;
 }
 
-static void set_slot(struct input_dev *dev, int slot, bool active, int x, int y)
+static void synaptics_report_semi_mt_slot(struct input_dev *dev, int slot,
+                                         bool active, int x, int y)
 {
        input_mt_slot(dev, slot);
        input_mt_report_slot_state(dev, MT_TOOL_FINGER, active);
@@ -502,14 +513,16 @@ static void synaptics_report_semi_mt_data(struct input_dev *dev,
                                          int num_fingers)
 {
        if (num_fingers >= 2) {
-               set_slot(dev, 0, true, min(a->x, b->x), min(a->y, b->y));
-               set_slot(dev, 1, true, max(a->x, b->x), max(a->y, b->y));
+               synaptics_report_semi_mt_slot(dev, 0, true, min(a->x, b->x),
+                                             min(a->y, b->y));
+               synaptics_report_semi_mt_slot(dev, 1, true, max(a->x, b->x),
+                                             max(a->y, b->y));
        } else if (num_fingers == 1) {
-               set_slot(dev, 0, true, a->x, a->y);
-               set_slot(dev, 1, false, 0, 0);
+               synaptics_report_semi_mt_slot(dev, 0, true, a->x, a->y);
+               synaptics_report_semi_mt_slot(dev, 1, false, 0, 0);
        } else {
-               set_slot(dev, 0, false, 0, 0);
-               set_slot(dev, 1, false, 0, 0);
+               synaptics_report_semi_mt_slot(dev, 0, false, 0, 0);
+               synaptics_report_semi_mt_slot(dev, 1, false, 0, 0);
        }
 }
 
@@ -684,23 +697,36 @@ static psmouse_ret_t synaptics_process_byte(struct psmouse *psmouse)
 static void set_input_params(struct input_dev *dev, struct synaptics_data *priv)
 {
        int i;
+       int fuzz = SYN_CAP_REDUCED_FILTERING(priv->ext_cap_0c) ?
+                       SYN_REDUCED_FILTER_FUZZ : 0;
 
        __set_bit(INPUT_PROP_POINTER, dev->propbit);
 
        __set_bit(EV_ABS, dev->evbit);
        input_set_abs_params(dev, ABS_X,
-                            XMIN_NOMINAL, priv->x_max ?: XMAX_NOMINAL, 0, 0);
+                            priv->x_min ?: XMIN_NOMINAL,
+                            priv->x_max ?: XMAX_NOMINAL,
+                            fuzz, 0);
        input_set_abs_params(dev, ABS_Y,
-                            YMIN_NOMINAL, priv->y_max ?: YMAX_NOMINAL, 0, 0);
+                            priv->y_min ?: YMIN_NOMINAL,
+                            priv->y_max ?: YMAX_NOMINAL,
+                            fuzz, 0);
        input_set_abs_params(dev, ABS_PRESSURE, 0, 255, 0, 0);
 
        if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c)) {
                __set_bit(INPUT_PROP_SEMI_MT, dev->propbit);
                input_mt_init_slots(dev, 2);
-               input_set_abs_params(dev, ABS_MT_POSITION_X, XMIN_NOMINAL,
-                                    priv->x_max ?: XMAX_NOMINAL, 0, 0);
-               input_set_abs_params(dev, ABS_MT_POSITION_Y, YMIN_NOMINAL,
-                                    priv->y_max ?: YMAX_NOMINAL, 0, 0);
+               input_set_abs_params(dev, ABS_MT_POSITION_X,
+                                    priv->x_min ?: XMIN_NOMINAL,
+                                    priv->x_max ?: XMAX_NOMINAL,
+                                    fuzz, 0);
+               input_set_abs_params(dev, ABS_MT_POSITION_Y,
+                                    priv->y_min ?: YMIN_NOMINAL,
+                                    priv->y_max ?: YMAX_NOMINAL,
+                                    fuzz, 0);
+
+               input_abs_set_res(dev, ABS_MT_POSITION_X, priv->x_res);
+               input_abs_set_res(dev, ABS_MT_POSITION_Y, priv->y_res);
        }
 
        if (SYN_CAP_PALMDETECT(priv->capabilities))
@@ -971,4 +997,3 @@ bool synaptics_supported(void)
 }
 
 #endif /* CONFIG_MOUSE_PS2_SYNAPTICS */
-
index 7453938..ca040aa 100644 (file)
@@ -19,7 +19,8 @@
 #define SYN_QUE_RESOLUTION             0x08
 #define SYN_QUE_EXT_CAPAB              0x09
 #define SYN_QUE_EXT_CAPAB_0C           0x0c
-#define SYN_QUE_EXT_DIMENSIONS         0x0d
+#define SYN_QUE_EXT_MAX_COORDS         0x0d
+#define SYN_QUE_EXT_MIN_COORDS         0x0f
 
 /* synatics modes */
 #define SYN_BIT_ABSOLUTE_MODE          (1 << 7)
  * 1   0x60    multifinger mode        identifies firmware finger counting
  *                                     (not reporting!) algorithm.
  *                                     Not particularly meaningful
- * 1   0x80    covered pad             W clipped to 14, 15 == pad mostly covered
- * 2   0x01    clickpad bit 1          2-button ClickPad
- * 2   0x02    deluxe LED controls     touchpad support LED commands
+ * 1   0x80    covered pad             W clipped to 14, 15 == pad mostly covered
+ * 2   0x01    clickpad bit 1          2-button ClickPad
+ * 2   0x02    deluxe LED controls     touchpad support LED commands
  *                                     ala multimedia control bar
  * 2   0x04    reduced filtering       firmware does less filtering on
  *                                     position data, driver should watch
  *                                     for noise.
+ * 2   0x20    report min              query 0x0f gives min coord reported
  */
 #define SYN_CAP_CLICKPAD(ex0c)         ((ex0c) & 0x100000) /* 1-button ClickPad */
 #define SYN_CAP_CLICKPAD2BTN(ex0c)     ((ex0c) & 0x000100) /* 2-button ClickPad */
 #define SYN_CAP_MAX_DIMENSIONS(ex0c)   ((ex0c) & 0x020000)
+#define SYN_CAP_MIN_DIMENSIONS(ex0c)   ((ex0c) & 0x002000)
 #define SYN_CAP_ADV_GESTURE(ex0c)      ((ex0c) & 0x080000)
+#define SYN_CAP_REDUCED_FILTERING(ex0c)        ((ex0c) & 0x000400)
 
 /* synaptics modes query bits */
 #define SYN_MODE_ABSOLUTE(m)           ((m) & (1 << 7))
 #define SYN_NEWABS_RELAXED             2
 #define SYN_OLDABS                     3
 
+/* amount to fuzz position data when touchpad reports reduced filtering */
+#define SYN_REDUCED_FILTER_FUZZ                8
+
 /*
  * A structure to describe the state of the touchpad hardware (buttons and pad)
  */
@@ -130,7 +137,8 @@ struct synaptics_data {
        unsigned long int ext_cap_0c;           /* Ext Caps from 0x0c query */
        unsigned long int identity;             /* Identification */
        unsigned int x_res, y_res;              /* X/Y resolution in units/mm */
-       unsigned int x_max, y_max;              /* Max dimensions (from FW) */
+       unsigned int x_max, y_max;              /* Max coordinates (from FW) */
+       unsigned int x_min, y_min;              /* Min coordinates (from FW) */
 
        unsigned char pkt_type;                 /* packet type - old, new, etc */
        unsigned char mode;                     /* current mode byte */
index 6ee8f0d..95280f9 100644 (file)
@@ -372,6 +372,6 @@ static void __exit psif_exit(void)
 module_init(psif_init);
 module_exit(psif_exit);
 
-MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("Atmel AVR32 PSIF PS/2 driver");
 MODULE_LICENSE("GPL");
index 4220620..979c443 100644 (file)
@@ -795,7 +795,7 @@ int hp_sdc_release_cooked_irq(hp_sdc_irqhook *callback)
 
 /************************* Keepalive timer task *********************/
 
-void hp_sdc_kicker (unsigned long data)
+static void hp_sdc_kicker(unsigned long data)
 {
        tasklet_schedule(&hp_sdc.task);
        /* Re-insert the periodic task. */
index 0a619c5..6d89fd1 100644 (file)
        /* toolMode codes
         */
 #define AIPTEK_TOOL_BUTTON_PEN_MODE                    BTN_TOOL_PEN
-#define AIPTEK_TOOL_BUTTON_PEN_MODE                    BTN_TOOL_PEN
 #define AIPTEK_TOOL_BUTTON_PENCIL_MODE                 BTN_TOOL_PENCIL
 #define AIPTEK_TOOL_BUTTON_BRUSH_MODE                  BTN_TOOL_BRUSH
 #define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE               BTN_TOOL_AIRBRUSH
index 08ba5ad..03ebcc8 100644 (file)
@@ -15,6 +15,7 @@
 #include "wacom_wac.h"
 #include "wacom.h"
 #include <linux/input/mt.h>
+#include <linux/hid.h>
 
 /* resolution for penabled devices */
 #define WACOM_PL_RES           20
@@ -264,6 +265,7 @@ static int wacom_graphire_irq(struct wacom_wac *wacom)
                        wacom->id[0] = 0;
                input_report_abs(input, ABS_MISC, wacom->id[0]); /* report tool id */
                input_report_key(input, wacom->tool[0], prox);
+               input_event(input, EV_MSC, MSC_SERIAL, 1);
                input_sync(input); /* sync last event */
        }
 
@@ -273,11 +275,10 @@ static int wacom_graphire_irq(struct wacom_wac *wacom)
                prox = data[7] & 0xf8;
                if (prox || wacom->id[1]) {
                        wacom->id[1] = PAD_DEVICE_ID;
-                       input_report_key(input, BTN_0, (data[7] & 0x40));
-                       input_report_key(input, BTN_4, (data[7] & 0x80));
+                       input_report_key(input, BTN_BACK, (data[7] & 0x40));
+                       input_report_key(input, BTN_FORWARD, (data[7] & 0x80));
                        rw = ((data[7] & 0x18) >> 3) - ((data[7] & 0x20) >> 3);
                        input_report_rel(input, REL_WHEEL, rw);
-                       input_report_key(input, BTN_TOOL_FINGER, 0xf0);
                        if (!prox)
                                wacom->id[1] = 0;
                        input_report_abs(input, ABS_MISC, wacom->id[1]);
@@ -290,18 +291,17 @@ static int wacom_graphire_irq(struct wacom_wac *wacom)
                prox = (data[7] & 0xf8) || data[8];
                if (prox || wacom->id[1]) {
                        wacom->id[1] = PAD_DEVICE_ID;
-                       input_report_key(input, BTN_0, (data[7] & 0x08));
-                       input_report_key(input, BTN_1, (data[7] & 0x20));
-                       input_report_key(input, BTN_4, (data[7] & 0x10));
-                       input_report_key(input, BTN_5, (data[7] & 0x40));
+                       input_report_key(input, BTN_BACK, (data[7] & 0x08));
+                       input_report_key(input, BTN_LEFT, (data[7] & 0x20));
+                       input_report_key(input, BTN_FORWARD, (data[7] & 0x10));
+                       input_report_key(input, BTN_RIGHT, (data[7] & 0x40));
                        input_report_abs(input, ABS_WHEEL, (data[8] & 0x7f));
-                       input_report_key(input, BTN_TOOL_FINGER, 0xf0);
                        if (!prox)
                                wacom->id[1] = 0;
                        input_report_abs(input, ABS_MISC, wacom->id[1]);
                        input_event(input, EV_MSC, MSC_SERIAL, 0xf0);
+                       retval = 1;
                }
-               retval = 1;
                break;
        }
 exit:
@@ -494,10 +494,6 @@ static int wacom_intuos_irq(struct wacom_wac *wacom)
 
        /* pad packets. Works as a second tool and is always in prox */
        if (data[0] == WACOM_REPORT_INTUOSPAD) {
-               /* initiate the pad as a device */
-               if (wacom->tool[1] != BTN_TOOL_FINGER)
-                       wacom->tool[1] = BTN_TOOL_FINGER;
-
                if (features->type >= INTUOS4S && features->type <= INTUOS4L) {
                        input_report_key(input, BTN_0, (data[2] & 0x01));
                        input_report_key(input, BTN_1, (data[3] & 0x01));
@@ -1080,18 +1076,14 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 
        switch (wacom_wac->features.type) {
        case WACOM_MO:
-               __set_bit(BTN_1, input_dev->keybit);
-               __set_bit(BTN_5, input_dev->keybit);
-
                input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
                /* fall through */
 
        case WACOM_G4:
                input_set_capability(input_dev, EV_MSC, MSC_SERIAL);
 
-               __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-               __set_bit(BTN_0, input_dev->keybit);
-               __set_bit(BTN_4, input_dev->keybit);
+               __set_bit(BTN_BACK, input_dev->keybit);
+               __set_bit(BTN_FORWARD, input_dev->keybit);
                /* fall through */
 
        case GRAPHIRE:
@@ -1127,10 +1119,12 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
        case CINTIQ:
                for (i = 0; i < 8; i++)
                        __set_bit(BTN_0 + i, input_dev->keybit);
-               __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
 
-               input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
-               input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
+               if (wacom_wac->features.type != WACOM_21UX2) {
+                       input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
+                       input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
+               }
+
                input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
                wacom_setup_cintiq(wacom_wac);
                break;
@@ -1151,8 +1145,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
                __set_bit(BTN_2, input_dev->keybit);
                __set_bit(BTN_3, input_dev->keybit);
 
-               __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-
                input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
                input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
                /* fall through */
@@ -1170,7 +1162,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
        case INTUOS4S:
                for (i = 0; i < 7; i++)
                        __set_bit(BTN_0 + i, input_dev->keybit);
-               __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
 
                input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
                wacom_setup_intuos(wacom_wac);
@@ -1295,6 +1286,12 @@ static const struct wacom_features wacom_features_0x65 =
 static const struct wacom_features wacom_features_0x69 =
        { "Wacom Bamboo1",        WACOM_PKGLEN_GRAPHIRE,   5104,  3712,  511,
          63, GRAPHIRE, WACOM_PENPRTN_RES, WACOM_PENPRTN_RES };
+static const struct wacom_features wacom_features_0x6A =
+       { "Wacom Bamboo1 4x6",    WACOM_PKGLEN_GRAPHIRE,  14760,  9225, 1023,
+         63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x6B =
+       { "Wacom Bamboo1 5x8",    WACOM_PKGLEN_GRAPHIRE,  21648, 13530, 1023,
+         63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x20 =
        { "Wacom Intuos 4x5",     WACOM_PKGLEN_INTUOS,    12700, 10600, 1023,
          31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1427,6 +1424,9 @@ static const struct wacom_features wacom_features_0x90 =
 static const struct wacom_features wacom_features_0x93 =
        { "Wacom ISDv4 93",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
          0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x97 =
+       { "Wacom ISDv4 97",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  511,
+         0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0x9A =
        { "Wacom ISDv4 9A",       WACOM_PKGLEN_GRAPHIRE,  26202, 16325,  255,
          0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1458,7 +1458,7 @@ static const struct wacom_features wacom_features_0xD3 =
        { "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN,     21648, 13530, 1023,
          63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD4 =
-       { "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200,  255,
+       { "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
          63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD6 =
        { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN,   14720,  9200, 1023,
@@ -1483,6 +1483,11 @@ static const struct wacom_features wacom_features_0x6004 =
        USB_DEVICE(USB_VENDOR_ID_WACOM, prod),                  \
        .driver_info = (kernel_ulong_t)&wacom_features_##prod
 
+#define USB_DEVICE_DETAILED(prod, class, sub, proto)                   \
+       USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class, \
+                                     sub, proto),                      \
+       .driver_info = (kernel_ulong_t)&wacom_features_##prod
+
 #define USB_DEVICE_LENOVO(prod)                                        \
        USB_DEVICE(USB_VENDOR_ID_LENOVO, prod),                 \
        .driver_info = (kernel_ulong_t)&wacom_features_##prod
@@ -1506,6 +1511,8 @@ const struct usb_device_id wacom_ids[] = {
        { USB_DEVICE_WACOM(0x64) },
        { USB_DEVICE_WACOM(0x65) },
        { USB_DEVICE_WACOM(0x69) },
+       { USB_DEVICE_WACOM(0x6A) },
+       { USB_DEVICE_WACOM(0x6B) },
        { USB_DEVICE_WACOM(0x20) },
        { USB_DEVICE_WACOM(0x21) },
        { USB_DEVICE_WACOM(0x22) },
@@ -1545,7 +1552,13 @@ const struct usb_device_id wacom_ids[] = {
        { USB_DEVICE_WACOM(0xC5) },
        { USB_DEVICE_WACOM(0xC6) },
        { USB_DEVICE_WACOM(0xC7) },
-       { USB_DEVICE_WACOM(0xCE) },
+       /*
+        * DTU-2231 has two interfaces on the same configuration,
+        * only one is used.
+        */
+       { USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID,
+                             USB_INTERFACE_SUBCLASS_BOOT,
+                             USB_INTERFACE_PROTOCOL_MOUSE) },
        { USB_DEVICE_WACOM(0xD0) },
        { USB_DEVICE_WACOM(0xD1) },
        { USB_DEVICE_WACOM(0xD2) },
@@ -1560,6 +1573,7 @@ const struct usb_device_id wacom_ids[] = {
        { USB_DEVICE_WACOM(0xCC) },
        { USB_DEVICE_WACOM(0x90) },
        { USB_DEVICE_WACOM(0x93) },
+       { USB_DEVICE_WACOM(0x97) },
        { USB_DEVICE_WACOM(0x9A) },
        { USB_DEVICE_WACOM(0x9F) },
        { USB_DEVICE_WACOM(0xE2) },
index 5196861..d507b9b 100644 (file)
@@ -967,17 +967,12 @@ static int __devinit ads7846_setup_pendown(struct spi_device *spi, struct ads784
                ts->get_pendown_state = pdata->get_pendown_state;
        } else if (gpio_is_valid(pdata->gpio_pendown)) {
 
-               err = gpio_request(pdata->gpio_pendown, "ads7846_pendown");
+               err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN,
+                                      "ads7846_pendown");
                if (err) {
-                       dev_err(&spi->dev, "failed to request pendown GPIO%d\n",
-                               pdata->gpio_pendown);
-                       return err;
-               }
-               err = gpio_direction_input(pdata->gpio_pendown);
-               if (err) {
-                       dev_err(&spi->dev, "failed to setup pendown GPIO%d\n",
-                               pdata->gpio_pendown);
-                       gpio_free(pdata->gpio_pendown);
+                       dev_err(&spi->dev,
+                               "failed to request/setup pendown GPIO%d: %d\n",
+                               pdata->gpio_pendown, err);
                        return err;
                }
 
index fa8e56b..8034cbb 100644 (file)
@@ -164,7 +164,7 @@ static irqreturn_t atmel_wm97xx_channel_b_interrupt(int irq, void *dev_id)
 
                data = ac97c_readl(atmel_wm97xx, CBRHR);
                value = data & 0x0fff;
-               source = data & WM97XX_ADCSRC_MASK;
+               source = data & WM97XX_ADCSEL_MASK;
                pen_down = (data & WM97XX_PEN_DOWN) >> 8;
 
                if (source == WM97XX_ADCSEL_X)
@@ -442,6 +442,6 @@ static void __exit atmel_wm97xx_exit(void)
 }
 module_exit(atmel_wm97xx_exit);
 
-MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("wm97xx continuous touch driver for Atmel AT91 and AVR32");
 MODULE_LICENSE("GPL");
index 1e61387..ae00604 100644 (file)
 #define MXT_OBJECT_SIZE                6
 
 /* Object types */
-#define MXT_DEBUG_DIAGNOSTIC   37
-#define MXT_GEN_MESSAGE                5
-#define MXT_GEN_COMMAND                6
-#define MXT_GEN_POWER          7
-#define MXT_GEN_ACQUIRE                8
-#define MXT_TOUCH_MULTI                9
-#define MXT_TOUCH_KEYARRAY     15
-#define MXT_TOUCH_PROXIMITY    23
-#define MXT_PROCI_GRIPFACE     20
-#define MXT_PROCG_NOISE                22
-#define MXT_PROCI_ONETOUCH     24
-#define MXT_PROCI_TWOTOUCH     27
-#define MXT_PROCI_GRIP         40
-#define MXT_PROCI_PALM         41
-#define MXT_SPT_COMMSCONFIG    18
-#define MXT_SPT_GPIOPWM                19
-#define MXT_SPT_SELFTEST       25
-#define MXT_SPT_CTECONFIG      28
-#define MXT_SPT_USERDATA       38
-#define MXT_SPT_DIGITIZER      43
-#define MXT_SPT_MESSAGECOUNT   44
-
-/* MXT_GEN_COMMAND field */
+#define MXT_DEBUG_DIAGNOSTIC_T37       37
+#define MXT_GEN_MESSAGE_T5             5
+#define MXT_GEN_COMMAND_T6             6
+#define MXT_GEN_POWER_T7               7
+#define MXT_GEN_ACQUIRE_T8             8
+#define MXT_GEN_DATASOURCE_T53         53
+#define MXT_TOUCH_MULTI_T9             9
+#define MXT_TOUCH_KEYARRAY_T15         15
+#define MXT_TOUCH_PROXIMITY_T23                23
+#define MXT_TOUCH_PROXKEY_T52          52
+#define MXT_PROCI_GRIPFACE_T20         20
+#define MXT_PROCG_NOISE_T22            22
+#define MXT_PROCI_ONETOUCH_T24         24
+#define MXT_PROCI_TWOTOUCH_T27         27
+#define MXT_PROCI_GRIP_T40             40
+#define MXT_PROCI_PALM_T41             41
+#define MXT_PROCI_TOUCHSUPPRESSION_T42 42
+#define MXT_PROCI_STYLUS_T47           47
+#define MXT_PROCG_NOISESUPPRESSION_T48 48
+#define MXT_SPT_COMMSCONFIG_T18                18
+#define MXT_SPT_GPIOPWM_T19            19
+#define MXT_SPT_SELFTEST_T25           25
+#define MXT_SPT_CTECONFIG_T28          28
+#define MXT_SPT_USERDATA_T38           38
+#define MXT_SPT_DIGITIZER_T43          43
+#define MXT_SPT_MESSAGECOUNT_T44       44
+#define MXT_SPT_CTECONFIG_T46          46
+
+/* MXT_GEN_COMMAND_T6 field */
 #define MXT_COMMAND_RESET      0
 #define MXT_COMMAND_BACKUPNV   1
 #define MXT_COMMAND_CALIBRATE  2
 #define MXT_COMMAND_REPORTALL  3
 #define MXT_COMMAND_DIAGNOSTIC 5
 
-/* MXT_GEN_POWER field */
+/* MXT_GEN_POWER_T7 field */
 #define MXT_POWER_IDLEACQINT   0
 #define MXT_POWER_ACTVACQINT   1
 #define MXT_POWER_ACTV2IDLETO  2
 
-/* MXT_GEN_ACQUIRE field */
+/* MXT_GEN_ACQUIRE_T8 field */
 #define MXT_ACQUIRE_CHRGTIME   0
 #define MXT_ACQUIRE_TCHDRIFT   2
 #define MXT_ACQUIRE_DRIFTST    3
@@ -91,7 +97,7 @@
 #define MXT_ACQUIRE_ATCHCALST  6
 #define MXT_ACQUIRE_ATCHCALSTHR        7
 
-/* MXT_TOUCH_MULTI field */
+/* MXT_TOUCH_MULTI_T9 field */
 #define MXT_TOUCH_CTRL         0
 #define MXT_TOUCH_XORIGIN      1
 #define MXT_TOUCH_YORIGIN      2
 #define MXT_TOUCH_YEDGEDIST    29
 #define MXT_TOUCH_JUMPLIMIT    30
 
-/* MXT_PROCI_GRIPFACE field */
+/* MXT_PROCI_GRIPFACE_T20 field */
 #define MXT_GRIPFACE_CTRL      0
 #define MXT_GRIPFACE_XLOGRIP   1
 #define MXT_GRIPFACE_XHIGRIP   2
 #define MXT_NOISE_FREQ4                15
 #define MXT_NOISE_IDLEGCAFVALID        16
 
-/* MXT_SPT_COMMSCONFIG */
+/* MXT_SPT_COMMSCONFIG_T18 */
 #define MXT_COMMS_CTRL         0
 #define MXT_COMMS_CMD          1
 
-/* MXT_SPT_CTECONFIG field */
+/* MXT_SPT_CTECONFIG_T28 field */
 #define MXT_CTE_CTRL           0
 #define MXT_CTE_CMD            1
 #define MXT_CTE_MODE           2
 #define MXT_VOLTAGE_DEFAULT    2700000
 #define MXT_VOLTAGE_STEP       10000
 
-/* Define for MXT_GEN_COMMAND */
+/* Define for MXT_GEN_COMMAND_T6 */
 #define MXT_BOOT_VALUE         0xa5
 #define MXT_BACKUP_VALUE       0x55
 #define MXT_BACKUP_TIME                25      /* msec */
@@ -256,24 +262,31 @@ struct mxt_data {
 static bool mxt_object_readable(unsigned int type)
 {
        switch (type) {
-       case MXT_GEN_MESSAGE:
-       case MXT_GEN_COMMAND:
-       case MXT_GEN_POWER:
-       case MXT_GEN_ACQUIRE:
-       case MXT_TOUCH_MULTI:
-       case MXT_TOUCH_KEYARRAY:
-       case MXT_TOUCH_PROXIMITY:
-       case MXT_PROCI_GRIPFACE:
-       case MXT_PROCG_NOISE:
-       case MXT_PROCI_ONETOUCH:
-       case MXT_PROCI_TWOTOUCH:
-       case MXT_PROCI_GRIP:
-       case MXT_PROCI_PALM:
-       case MXT_SPT_COMMSCONFIG:
-       case MXT_SPT_GPIOPWM:
-       case MXT_SPT_SELFTEST:
-       case MXT_SPT_CTECONFIG:
-       case MXT_SPT_USERDATA:
+       case MXT_GEN_MESSAGE_T5:
+       case MXT_GEN_COMMAND_T6:
+       case MXT_GEN_POWER_T7:
+       case MXT_GEN_ACQUIRE_T8:
+       case MXT_GEN_DATASOURCE_T53:
+       case MXT_TOUCH_MULTI_T9:
+       case MXT_TOUCH_KEYARRAY_T15:
+       case MXT_TOUCH_PROXIMITY_T23:
+       case MXT_TOUCH_PROXKEY_T52:
+       case MXT_PROCI_GRIPFACE_T20:
+       case MXT_PROCG_NOISE_T22:
+       case MXT_PROCI_ONETOUCH_T24:
+       case MXT_PROCI_TWOTOUCH_T27:
+       case MXT_PROCI_GRIP_T40:
+       case MXT_PROCI_PALM_T41:
+       case MXT_PROCI_TOUCHSUPPRESSION_T42:
+       case MXT_PROCI_STYLUS_T47:
+       case MXT_PROCG_NOISESUPPRESSION_T48:
+       case MXT_SPT_COMMSCONFIG_T18:
+       case MXT_SPT_GPIOPWM_T19:
+       case MXT_SPT_SELFTEST_T25:
+       case MXT_SPT_CTECONFIG_T28:
+       case MXT_SPT_USERDATA_T38:
+       case MXT_SPT_DIGITIZER_T43:
+       case MXT_SPT_CTECONFIG_T46:
                return true;
        default:
                return false;
@@ -283,21 +296,28 @@ static bool mxt_object_readable(unsigned int type)
 static bool mxt_object_writable(unsigned int type)
 {
        switch (type) {
-       case MXT_GEN_COMMAND:
-       case MXT_GEN_POWER:
-       case MXT_GEN_ACQUIRE:
-       case MXT_TOUCH_MULTI:
-       case MXT_TOUCH_KEYARRAY:
-       case MXT_TOUCH_PROXIMITY:
-       case MXT_PROCI_GRIPFACE:
-       case MXT_PROCG_NOISE:
-       case MXT_PROCI_ONETOUCH:
-       case MXT_PROCI_TWOTOUCH:
-       case MXT_PROCI_GRIP:
-       case MXT_PROCI_PALM:
-       case MXT_SPT_GPIOPWM:
-       case MXT_SPT_SELFTEST:
-       case MXT_SPT_CTECONFIG:
+       case MXT_GEN_COMMAND_T6:
+       case MXT_GEN_POWER_T7:
+       case MXT_GEN_ACQUIRE_T8:
+       case MXT_TOUCH_MULTI_T9:
+       case MXT_TOUCH_KEYARRAY_T15:
+       case MXT_TOUCH_PROXIMITY_T23:
+       case MXT_TOUCH_PROXKEY_T52:
+       case MXT_PROCI_GRIPFACE_T20:
+       case MXT_PROCG_NOISE_T22:
+       case MXT_PROCI_ONETOUCH_T24:
+       case MXT_PROCI_TWOTOUCH_T27:
+       case MXT_PROCI_GRIP_T40:
+       case MXT_PROCI_PALM_T41:
+       case MXT_PROCI_TOUCHSUPPRESSION_T42:
+       case MXT_PROCI_STYLUS_T47:
+       case MXT_PROCG_NOISESUPPRESSION_T48:
+       case MXT_SPT_COMMSCONFIG_T18:
+       case MXT_SPT_GPIOPWM_T19:
+       case MXT_SPT_SELFTEST_T25:
+       case MXT_SPT_CTECONFIG_T28:
+       case MXT_SPT_DIGITIZER_T43:
+       case MXT_SPT_CTECONFIG_T46:
                return true;
        default:
                return false;
@@ -455,7 +475,7 @@ static int mxt_read_message(struct mxt_data *data,
        struct mxt_object *object;
        u16 reg;
 
-       object = mxt_get_object(data, MXT_GEN_MESSAGE);
+       object = mxt_get_object(data, MXT_GEN_MESSAGE_T5);
        if (!object)
                return -EINVAL;
 
@@ -597,8 +617,8 @@ static irqreturn_t mxt_interrupt(int irq, void *dev_id)
 
                reportid = message.reportid;
 
-               /* whether reportid is thing of MXT_TOUCH_MULTI */
-               object = mxt_get_object(data, MXT_TOUCH_MULTI);
+               /* whether reportid is thing of MXT_TOUCH_MULTI_T9 */
+               object = mxt_get_object(data, MXT_TOUCH_MULTI_T9);
                if (!object)
                        goto end;
 
@@ -635,7 +655,9 @@ static int mxt_check_reg_init(struct mxt_data *data)
                if (!mxt_object_writable(object->type))
                        continue;
 
-               for (j = 0; j < object->size + 1; j++) {
+               for (j = 0;
+                    j < (object->size + 1) * (object->instances + 1);
+                    j++) {
                        config_offset = index + j;
                        if (config_offset > pdata->config_length) {
                                dev_err(dev, "Not enough config data!\n");
@@ -644,7 +666,7 @@ static int mxt_check_reg_init(struct mxt_data *data)
                        mxt_write_object(data, object->type, j,
                                         pdata->config[config_offset]);
                }
-               index += object->size + 1;
+               index += (object->size + 1) * (object->instances + 1);
        }
 
        return 0;
@@ -678,31 +700,31 @@ static void mxt_handle_pdata(struct mxt_data *data)
        u8 voltage;
 
        /* Set touchscreen lines */
-       mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_XSIZE,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE,
                        pdata->x_line);
-       mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_YSIZE,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE,
                        pdata->y_line);
 
        /* Set touchscreen orient */
-       mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_ORIENT,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT,
                        pdata->orient);
 
        /* Set touchscreen burst length */
-       mxt_write_object(data, MXT_TOUCH_MULTI,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9,
                        MXT_TOUCH_BLEN, pdata->blen);
 
        /* Set touchscreen threshold */
-       mxt_write_object(data, MXT_TOUCH_MULTI,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9,
                        MXT_TOUCH_TCHTHR, pdata->threshold);
 
        /* Set touchscreen resolution */
-       mxt_write_object(data, MXT_TOUCH_MULTI,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9,
                        MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
-       mxt_write_object(data, MXT_TOUCH_MULTI,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9,
                        MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
-       mxt_write_object(data, MXT_TOUCH_MULTI,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9,
                        MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
-       mxt_write_object(data, MXT_TOUCH_MULTI,
+       mxt_write_object(data, MXT_TOUCH_MULTI_T9,
                        MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
 
        /* Set touchscreen voltage */
@@ -715,7 +737,7 @@ static void mxt_handle_pdata(struct mxt_data *data)
                        voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) /
                                MXT_VOLTAGE_STEP;
 
-               mxt_write_object(data, MXT_SPT_CTECONFIG,
+               mxt_write_object(data, MXT_SPT_CTECONFIG_T28,
                                MXT_CTE_VOLTAGE, voltage);
        }
 }
@@ -819,13 +841,13 @@ static int mxt_initialize(struct mxt_data *data)
        mxt_handle_pdata(data);
 
        /* Backup to memory */
-       mxt_write_object(data, MXT_GEN_COMMAND,
+       mxt_write_object(data, MXT_GEN_COMMAND_T6,
                        MXT_COMMAND_BACKUPNV,
                        MXT_BACKUP_VALUE);
        msleep(MXT_BACKUP_TIME);
 
        /* Soft reset */
-       mxt_write_object(data, MXT_GEN_COMMAND,
+       mxt_write_object(data, MXT_GEN_COMMAND_T6,
                        MXT_COMMAND_RESET, 1);
        msleep(MXT_RESET_TIME);
 
@@ -921,7 +943,7 @@ static int mxt_load_fw(struct device *dev, const char *fn)
        }
 
        /* Change to the bootloader mode */
-       mxt_write_object(data, MXT_GEN_COMMAND,
+       mxt_write_object(data, MXT_GEN_COMMAND_T6,
                        MXT_COMMAND_RESET, MXT_BOOT_VALUE);
        msleep(MXT_RESET_TIME);
 
@@ -1027,14 +1049,14 @@ static void mxt_start(struct mxt_data *data)
 {
        /* Touch enable */
        mxt_write_object(data,
-                       MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0x83);
+                       MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0x83);
 }
 
 static void mxt_stop(struct mxt_data *data)
 {
        /* Touch disable */
        mxt_write_object(data,
-                       MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0);
+                       MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0);
 }
 
 static int mxt_input_open(struct input_dev *dev)
@@ -1182,7 +1204,7 @@ static int mxt_resume(struct device *dev)
        struct input_dev *input_dev = data->input_dev;
 
        /* Soft reset */
-       mxt_write_object(data, MXT_GEN_COMMAND,
+       mxt_write_object(data, MXT_GEN_COMMAND_T6,
                        MXT_COMMAND_RESET, 1);
 
        msleep(MXT_RESET_TIME);
index a93c5c2..d8815c5 100644 (file)
@@ -84,9 +84,9 @@ static int cy8ctmg110_write_regs(struct cy8ctmg110 *tsc, unsigned char reg,
        memcpy(i2c_data + 1, value, len);
 
        ret = i2c_master_send(client, i2c_data, len + 1);
-       if (ret != 1) {
+       if (ret != len + 1) {
                dev_err(&client->dev, "i2c write data cmd failed\n");
-               return ret ? ret : -EIO;
+               return ret < 0 ? ret : -EIO;
        }
 
        return 0;
@@ -193,6 +193,8 @@ static int __devinit cy8ctmg110_probe(struct i2c_client *client,
 
        ts->client = client;
        ts->input = input_dev;
+       ts->reset_pin = pdata->reset_pin;
+       ts->irq_pin = pdata->irq_pin;
 
        snprintf(ts->phys, sizeof(ts->phys),
                 "%s/input0", dev_name(&client->dev));
@@ -328,7 +330,7 @@ static int __devexit cy8ctmg110_remove(struct i2c_client *client)
        return 0;
 }
 
-static struct i2c_device_id cy8ctmg110_idtable[] = {
+static const struct i2c_device_id cy8ctmg110_idtable[] = {
        { CY8CTMG110_DRIVER_NAME, 1 },
        { }
 };
index 66c96bf..3276952 100644 (file)
@@ -448,15 +448,11 @@ static int __devinit mrstouch_read_pmic_id(uint *vendor, uint *rev)
  */
 static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev)
 {
-       int err, i, found;
+       int found = 0;
+       int err, i;
        u8 r8;
 
-       found = -1;
-
        for (i = 0; i < MRSTOUCH_MAX_CHANNELS; i++) {
-               if (found >= 0)
-                       break;
-
                err = intel_scu_ipc_ioread8(PMICADDR0 + i, &r8);
                if (err)
                        return err;
@@ -466,16 +462,15 @@ static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev)
                        break;
                }
        }
-       if (found < 0)
-               return 0;
 
        if (tsdev->vendor == PMIC_VENDOR_FS) {
-               if (found && found > (MRSTOUCH_MAX_CHANNELS - 18))
+               if (found > MRSTOUCH_MAX_CHANNELS - 18)
                        return -ENOSPC;
        } else {
-               if (found && found > (MRSTOUCH_MAX_CHANNELS - 4))
+               if (found > MRSTOUCH_MAX_CHANNELS - 4)
                        return -ENOSPC;
        }
+
        return found;
 }
 
index 3242e70..e966c29 100644 (file)
@@ -157,9 +157,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm)
                        x, y, p);
 
                /* are samples valid */
-               if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
-                   (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
-                   (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+               if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+                   (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+                   (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
                        goto up;
 
                /* coordinate is good */
index 22a3411..089b0a0 100644 (file)
@@ -393,5 +393,5 @@ module_exit(tsc_exit);
 
 MODULE_AUTHOR("Cyril Chemparathy");
 MODULE_DESCRIPTION("TNETV107X Touchscreen Driver");
-MODULE_ALIAS("platform: tnetv107x-ts");
+MODULE_ALIAS("platform:tnetv107x-ts");
 MODULE_LICENSE("GPL");
index 98e6117..adc13a5 100644 (file)
@@ -215,8 +215,9 @@ static inline int is_pden(struct wm97xx *wm)
 static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 {
        int timeout = 5 * delay;
+       bool wants_pen = adcsel & WM97XX_PEN_DOWN;
 
-       if (!wm->pen_probably_down) {
+       if (wants_pen && !wm->pen_probably_down) {
                u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
                if (!(data & WM97XX_PEN_DOWN))
                        return RC_PENUP;
@@ -224,13 +225,10 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
        }
 
        /* set up digitiser */
-       if (adcsel & 0x8000)
-               adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
        if (wm->mach_ops && wm->mach_ops->pre_sample)
                wm->mach_ops->pre_sample(adcsel);
-       wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
-                        adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+       wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+                               | WM97XX_POLL | WM97XX_DELAY(delay));
 
        /* wait 3 AC97 time slots + delay for conversion */
        poll_delay(delay);
@@ -256,13 +254,14 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
                wm->mach_ops->post_sample(adcsel);
 
        /* check we have correct sample */
-       if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
-               dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-               *sample & WM97XX_ADCSEL_MASK);
+       if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+               dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+                       adcsel & WM97XX_ADCSEL_MASK,
+                       *sample & WM97XX_ADCSEL_MASK);
                return RC_PENUP;
        }
 
-       if (!(*sample & WM97XX_PEN_DOWN)) {
+       if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
                wm->pen_probably_down = 0;
                return RC_PENUP;
        }
@@ -277,14 +276,14 @@ static int wm9705_poll_touch(struct wm97xx *wm, struct wm97xx_data *data)
 {
        int rc;
 
-       rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
+       rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x);
        if (rc != RC_VALID)
                return rc;
-       rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
+       rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y);
        if (rc != RC_VALID)
                return rc;
        if (pil) {
-               rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES, &data->p);
+               rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p);
                if (rc != RC_VALID)
                        return rc;
        } else
index 2bc2fb8..6e743e3 100644 (file)
@@ -255,8 +255,9 @@ static inline int is_pden(struct wm97xx *wm)
 static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 {
        int timeout = 5 * delay;
+       bool wants_pen = adcsel & WM97XX_PEN_DOWN;
 
-       if (!wm->pen_probably_down) {
+       if (wants_pen && !wm->pen_probably_down) {
                u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
                if (!(data & WM97XX_PEN_DOWN))
                        return RC_PENUP;
@@ -264,13 +265,10 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
        }
 
        /* set up digitiser */
-       if (adcsel & 0x8000)
-               adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
        if (wm->mach_ops && wm->mach_ops->pre_sample)
                wm->mach_ops->pre_sample(adcsel);
-       wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
-                        adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+       wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+                               | WM97XX_POLL | WM97XX_DELAY(delay));
 
        /* wait 3 AC97 time slots + delay for conversion */
        poll_delay(delay);
@@ -296,13 +294,14 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
                wm->mach_ops->post_sample(adcsel);
 
        /* check we have correct sample */
-       if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
-               dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-               *sample & WM97XX_ADCSEL_MASK);
+       if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+               dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+                       adcsel & WM97XX_ADCSEL_MASK,
+                       *sample & WM97XX_ADCSEL_MASK);
                return RC_PENUP;
        }
 
-       if (!(*sample & WM97XX_PEN_DOWN)) {
+       if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
                wm->pen_probably_down = 0;
                return RC_PENUP;
        }
@@ -387,16 +386,18 @@ static int wm9712_poll_touch(struct wm97xx *wm, struct wm97xx_data *data)
                if (rc != RC_VALID)
                        return rc;
        } else {
-               rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
+               rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN,
+                                       &data->x);
                if (rc != RC_VALID)
                        return rc;
 
-               rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
+               rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN,
+                                       &data->y);
                if (rc != RC_VALID)
                        return rc;
 
                if (pil && !five_wire) {
-                       rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES,
+                       rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN,
                                                &data->p);
                        if (rc != RC_VALID)
                                return rc;
index 73ec995..7405353 100644 (file)
@@ -261,8 +261,9 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 {
        u16 dig1;
        int timeout = 5 * delay;
+       bool wants_pen = adcsel & WM97XX_PEN_DOWN;
 
-       if (!wm->pen_probably_down) {
+       if (wants_pen && !wm->pen_probably_down) {
                u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
                if (!(data & WM97XX_PEN_DOWN))
                        return RC_PENUP;
@@ -270,15 +271,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
        }
 
        /* set up digitiser */
-       if (adcsel & 0x8000)
-               adcsel = 1 << ((adcsel & 0x7fff) + 3);
-
        dig1 = wm97xx_reg_read(wm, AC97_WM9713_DIG1);
        dig1 &= ~WM9713_ADCSEL_MASK;
+       /* WM97XX_ADCSEL_* channels need to be converted to WM9713 format */
+       dig1 |= 1 << ((adcsel & WM97XX_ADCSEL_MASK) >> 12);
 
        if (wm->mach_ops && wm->mach_ops->pre_sample)
                wm->mach_ops->pre_sample(adcsel);
-       wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | adcsel | WM9713_POLL);
+       wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | WM9713_POLL);
 
        /* wait 3 AC97 time slots + delay for conversion */
        poll_delay(delay);
@@ -304,13 +304,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
                wm->mach_ops->post_sample(adcsel);
 
        /* check we have correct sample */
-       if ((*sample & WM97XX_ADCSRC_MASK) != ffs(adcsel >> 1) << 12) {
-               dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-                       *sample & WM97XX_ADCSRC_MASK);
+       if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+               dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+                       adcsel & WM97XX_ADCSEL_MASK,
+                       *sample & WM97XX_ADCSEL_MASK);
                return RC_PENUP;
        }
 
-       if (!(*sample & WM97XX_PEN_DOWN)) {
+       if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
                wm->pen_probably_down = 0;
                return RC_PENUP;
        }
@@ -400,14 +401,14 @@ static int wm9713_poll_touch(struct wm97xx *wm, struct wm97xx_data *data)
                if (rc != RC_VALID)
                        return rc;
        } else {
-               rc = wm9713_poll_sample(wm, WM9713_ADCSEL_X, &data->x);
+               rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x);
                if (rc != RC_VALID)
                        return rc;
-               rc = wm9713_poll_sample(wm, WM9713_ADCSEL_Y, &data->y);
+               rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y);
                if (rc != RC_VALID)
                        return rc;
                if (pil) {
-                       rc = wm9713_poll_sample(wm, WM9713_ADCSEL_PRES,
+                       rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN,
                                                &data->p);
                        if (rc != RC_VALID)
                                return rc;
index 5b0f15e..f6328c0 100644 (file)
@@ -122,9 +122,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm)
                        x, y, p);
 
                /* are samples valid */
-               if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
-                   (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
-                   (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+               if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+                   (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+                   (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
                        goto up;
 
                /* coordinate is good */
index 48e9cc0..1f73d7f 100644 (file)
@@ -2532,6 +2532,9 @@ static void _isdn_setup(struct net_device *dev)
 
        /* Setup the generic properties */
        dev->flags = IFF_NOARP|IFF_POINTOPOINT;
+
+       /* isdn prepends a header in the tx path, can't share skbs */
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->header_ops = NULL;
        dev->netdev_ops = &isdn_netdev_ops;
 
index 574b09a..0dc6546 100644 (file)
@@ -29,7 +29,6 @@
 #include "md.h"
 #include "bitmap.h"
 
-#include <linux/dm-dirty-log.h>
 /* debug macros */
 
 #define DEBUG 0
@@ -775,10 +774,8 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon
  * 0 or page 1
  */
 static inline struct page *filemap_get_page(struct bitmap *bitmap,
-                                       unsigned long chunk)
+                                           unsigned long chunk)
 {
-       if (bitmap->filemap == NULL)
-               return NULL;
        if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
                return NULL;
        return bitmap->filemap[file_page_index(bitmap, chunk)
@@ -878,28 +875,19 @@ enum bitmap_page_attr {
 static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
                                enum bitmap_page_attr attr)
 {
-       if (page)
-               __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
-       else
-               __set_bit(attr, &bitmap->logattrs);
+       __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
 }
 
 static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
                                enum bitmap_page_attr attr)
 {
-       if (page)
-               __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
-       else
-               __clear_bit(attr, &bitmap->logattrs);
+       __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
 }
 
 static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
                                           enum bitmap_page_attr attr)
 {
-       if (page)
-               return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
-       else
-               return test_bit(attr, &bitmap->logattrs);
+       return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
 }
 
 /*
@@ -912,30 +900,26 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p
 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 {
        unsigned long bit;
-       struct page *page = NULL;
+       struct page *page;
        void *kaddr;
        unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
 
-       if (!bitmap->filemap) {
-               struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
-               if (log)
-                       log->type->mark_region(log, chunk);
-       } else {
+       if (!bitmap->filemap)
+               return;
 
-               page = filemap_get_page(bitmap, chunk);
-               if (!page)
-                       return;
-               bit = file_page_offset(bitmap, chunk);
+       page = filemap_get_page(bitmap, chunk);
+       if (!page)
+               return;
+       bit = file_page_offset(bitmap, chunk);
 
-               /* set the bit */
-               kaddr = kmap_atomic(page, KM_USER0);
-               if (bitmap->flags & BITMAP_HOSTENDIAN)
-                       set_bit(bit, kaddr);
-               else
-                       __test_and_set_bit_le(bit, kaddr);
-               kunmap_atomic(kaddr, KM_USER0);
-               PRINTK("set file bit %lu page %lu\n", bit, page->index);
-       }
+       /* set the bit */
+       kaddr = kmap_atomic(page, KM_USER0);
+       if (bitmap->flags & BITMAP_HOSTENDIAN)
+               set_bit(bit, kaddr);
+       else
+               __set_bit_le(bit, kaddr);
+       kunmap_atomic(kaddr, KM_USER0);
+       PRINTK("set file bit %lu page %lu\n", bit, page->index);
        /* record page number so it gets flushed to disk when unplug occurs */
        set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
 }
@@ -952,16 +936,6 @@ void bitmap_unplug(struct bitmap *bitmap)
 
        if (!bitmap)
                return;
-       if (!bitmap->filemap) {
-               /* Must be using a dirty_log */
-               struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
-               dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs);
-               need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs);
-               if (dirty || need_write)
-                       if (log->type->flush(log))
-                               bitmap->flags |= BITMAP_WRITE_ERROR;
-               goto out;
-       }
 
        /* look at each page to see if there are any set bits that need to be
         * flushed out to disk */
@@ -990,7 +964,6 @@ void bitmap_unplug(struct bitmap *bitmap)
                else
                        md_super_wait(bitmap->mddev);
        }
-out:
        if (bitmap->flags & BITMAP_WRITE_ERROR)
                bitmap_file_kick(bitmap);
 }
@@ -1199,7 +1172,6 @@ void bitmap_daemon_work(mddev_t *mddev)
        struct page *page = NULL, *lastpage = NULL;
        sector_t blocks;
        void *paddr;
-       struct dm_dirty_log *log = mddev->bitmap_info.log;
 
        /* Use a mutex to guard daemon_work against
         * bitmap_destroy.
@@ -1224,12 +1196,11 @@ void bitmap_daemon_work(mddev_t *mddev)
        spin_lock_irqsave(&bitmap->lock, flags);
        for (j = 0; j < bitmap->chunks; j++) {
                bitmap_counter_t *bmc;
-               if (!bitmap->filemap) {
-                       if (!log)
-                               /* error or shutdown */
-                               break;
-               } else
-                       page = filemap_get_page(bitmap, j);
+               if (!bitmap->filemap)
+                       /* error or shutdown */
+                       break;
+
+               page = filemap_get_page(bitmap, j);
 
                if (page != lastpage) {
                        /* skip this page unless it's marked as needing cleaning */
@@ -1298,17 +1269,16 @@ void bitmap_daemon_work(mddev_t *mddev)
                                                  -1);
 
                                /* clear the bit */
-                               if (page) {
-                                       paddr = kmap_atomic(page, KM_USER0);
-                                       if (bitmap->flags & BITMAP_HOSTENDIAN)
-                                               clear_bit(file_page_offset(bitmap, j),
-                                                         paddr);
-                                       else
-                                               __test_and_clear_bit_le(file_page_offset(bitmap, j),
-                                                              paddr);
-                                       kunmap_atomic(paddr, KM_USER0);
-                               } else
-                                       log->type->clear_region(log, j);
+                               paddr = kmap_atomic(page, KM_USER0);
+                               if (bitmap->flags & BITMAP_HOSTENDIAN)
+                                       clear_bit(file_page_offset(bitmap, j),
+                                                 paddr);
+                               else
+                                       __clear_bit_le(
+                                                       file_page_offset(bitmap,
+                                                                        j),
+                                                       paddr);
+                               kunmap_atomic(paddr, KM_USER0);
                        }
                } else
                        j |= PAGE_COUNTER_MASK;
@@ -1316,16 +1286,12 @@ void bitmap_daemon_work(mddev_t *mddev)
        spin_unlock_irqrestore(&bitmap->lock, flags);
 
        /* now sync the final page */
-       if (lastpage != NULL || log != NULL) {
+       if (lastpage != NULL) {
                spin_lock_irqsave(&bitmap->lock, flags);
                if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
                        clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
                        spin_unlock_irqrestore(&bitmap->lock, flags);
-                       if (lastpage)
-                               write_page(bitmap, lastpage, 0);
-                       else
-                               if (log->type->flush(log))
-                                       bitmap->flags |= BITMAP_WRITE_ERROR;
+                       write_page(bitmap, lastpage, 0);
                } else {
                        set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
                        spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1767,12 +1733,10 @@ int bitmap_create(mddev_t *mddev)
        BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
 
        if (!file
-           && !mddev->bitmap_info.offset
-           && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */
+           && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
                return 0;
 
        BUG_ON(file && mddev->bitmap_info.offset);
-       BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log);
 
        bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
        if (!bitmap)
@@ -1863,6 +1827,7 @@ int bitmap_create(mddev_t *mddev)
 int bitmap_load(mddev_t *mddev)
 {
        int err = 0;
+       sector_t start = 0;
        sector_t sector = 0;
        struct bitmap *bitmap = mddev->bitmap;
 
@@ -1881,24 +1846,14 @@ int bitmap_load(mddev_t *mddev)
        }
        bitmap_close_sync(bitmap);
 
-       if (mddev->bitmap_info.log) {
-               unsigned long i;
-               struct dm_dirty_log *log = mddev->bitmap_info.log;
-               for (i = 0; i < bitmap->chunks; i++)
-                       if (!log->type->in_sync(log, i, 1))
-                               bitmap_set_memory_bits(bitmap,
-                                                      (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
-                                                      1);
-       } else {
-               sector_t start = 0;
-               if (mddev->degraded == 0
-                   || bitmap->events_cleared == mddev->events)
-                       /* no need to keep dirty bits to optimise a
-                        * re-add of a missing device */
-                       start = mddev->recovery_cp;
-
-               err = bitmap_init_from_disk(bitmap, start);
-       }
+       if (mddev->degraded == 0
+           || bitmap->events_cleared == mddev->events)
+               /* no need to keep dirty bits to optimise a
+                * re-add of a missing device */
+               start = mddev->recovery_cp;
+
+       err = bitmap_init_from_disk(bitmap, start);
+
        if (err)
                goto out;
 
index b2a127e..a28f2e5 100644 (file)
@@ -212,10 +212,6 @@ struct bitmap {
        unsigned long file_pages; /* number of pages in the file */
        int last_page_size; /* bytes in the last page */
 
-       unsigned long logattrs; /* used when filemap_attr doesn't exist
-                                * because we are working with a dirty_log
-                                */
-
        unsigned long flags;
 
        int allclean;
@@ -237,7 +233,6 @@ struct bitmap {
        wait_queue_head_t behind_wait;
 
        struct sysfs_dirent *sysfs_can_clear;
-
 };
 
 /* the bitmap API */
index dfc9425..8e221a2 100644 (file)
@@ -215,6 +215,55 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL_GPL(bio_clone_mddev);
 
+void md_trim_bio(struct bio *bio, int offset, int size)
+{
+       /* 'bio' is a cloned bio which we need to trim to match
+        * the given offset and size.
+        * This requires adjusting bi_sector, bi_size, and bi_io_vec
+        */
+       int i;
+       struct bio_vec *bvec;
+       int sofar = 0;
+
+       size <<= 9;
+       if (offset == 0 && size == bio->bi_size)
+               return;
+
+       bio->bi_sector += offset;
+       bio->bi_size = size;
+       offset <<= 9;
+       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+       while (bio->bi_idx < bio->bi_vcnt &&
+              bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+               /* remove this whole bio_vec */
+               offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+               bio->bi_idx++;
+       }
+       if (bio->bi_idx < bio->bi_vcnt) {
+               bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+               bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+       }
+       /* avoid any complications with bi_idx being non-zero*/
+       if (bio->bi_idx) {
+               memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+                       (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+               bio->bi_vcnt -= bio->bi_idx;
+               bio->bi_idx = 0;
+       }
+       /* Make sure vcnt and last bv are not too big */
+       bio_for_each_segment(bvec, bio, i) {
+               if (sofar + bvec->bv_len > size)
+                       bvec->bv_len = size - sofar;
+               if (bvec->bv_len == 0) {
+                       bio->bi_vcnt = i;
+                       break;
+               }
+               sofar += bvec->bv_len;
+       }
+}
+EXPORT_SYMBOL_GPL(md_trim_bio);
+
 /*
  * We have a system wide 'event count' that is incremented
  * on any 'interesting' event, and readers of /proc/mdstat
@@ -757,6 +806,10 @@ static void free_disk_sb(mdk_rdev_t * rdev)
                rdev->sb_start = 0;
                rdev->sectors = 0;
        }
+       if (rdev->bb_page) {
+               put_page(rdev->bb_page);
+               rdev->bb_page = NULL;
+       }
 }
 
 
@@ -1025,7 +1078,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
        ret = -EINVAL;
 
        bdevname(rdev->bdev, b);
-       sb = (mdp_super_t*)page_address(rdev->sb_page);
+       sb = page_address(rdev->sb_page);
 
        if (sb->md_magic != MD_SB_MAGIC) {
                printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
@@ -1054,6 +1107,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
        rdev->preferred_minor = sb->md_minor;
        rdev->data_offset = 0;
        rdev->sb_size = MD_SB_BYTES;
+       rdev->badblocks.shift = -1;
 
        if (sb->level == LEVEL_MULTIPATH)
                rdev->desc_nr = -1;
@@ -1064,7 +1118,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
                ret = 1;
        } else {
                __u64 ev1, ev2;
-               mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
+               mdp_super_t *refsb = page_address(refdev->sb_page);
                if (!uuid_equal(refsb, sb)) {
                        printk(KERN_WARNING "md: %s has different UUID to %s\n",
                                b, bdevname(refdev->bdev,b2));
@@ -1099,7 +1153,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
 static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        mdp_disk_t *desc;
-       mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+       mdp_super_t *sb = page_address(rdev->sb_page);
        __u64 ev1 = md_event(sb);
 
        rdev->raid_disk = -1;
@@ -1230,7 +1284,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
        rdev->sb_size = MD_SB_BYTES;
 
-       sb = (mdp_super_t*)page_address(rdev->sb_page);
+       sb = page_address(rdev->sb_page);
 
        memset(sb, 0, sizeof(*sb));
 
@@ -1395,6 +1449,8 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
        return cpu_to_le32(csum);
 }
 
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+                           int acknowledged);
 static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 {
        struct mdp_superblock_1 *sb;
@@ -1435,7 +1491,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
        if (ret) return ret;
 
 
-       sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+       sb = page_address(rdev->sb_page);
 
        if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
            sb->major_version != cpu_to_le32(1) ||
@@ -1473,12 +1529,52 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
        else
                rdev->desc_nr = le32_to_cpu(sb->dev_number);
 
+       if (!rdev->bb_page) {
+               rdev->bb_page = alloc_page(GFP_KERNEL);
+               if (!rdev->bb_page)
+                       return -ENOMEM;
+       }
+       if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
+           rdev->badblocks.count == 0) {
+               /* need to load the bad block list.
+                * Currently we limit it to one page.
+                */
+               s32 offset;
+               sector_t bb_sector;
+               u64 *bbp;
+               int i;
+               int sectors = le16_to_cpu(sb->bblog_size);
+               if (sectors > (PAGE_SIZE / 512))
+                       return -EINVAL;
+               offset = le32_to_cpu(sb->bblog_offset);
+               if (offset == 0)
+                       return -EINVAL;
+               bb_sector = (long long)offset;
+               if (!sync_page_io(rdev, bb_sector, sectors << 9,
+                                 rdev->bb_page, READ, true))
+                       return -EIO;
+               bbp = (u64 *)page_address(rdev->bb_page);
+               rdev->badblocks.shift = sb->bblog_shift;
+               for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
+                       u64 bb = le64_to_cpu(*bbp);
+                       int count = bb & (0x3ff);
+                       u64 sector = bb >> 10;
+                       sector <<= sb->bblog_shift;
+                       count <<= sb->bblog_shift;
+                       if (bb + 1 == 0)
+                               break;
+                       if (md_set_badblocks(&rdev->badblocks,
+                                            sector, count, 1) == 0)
+                               return -EINVAL;
+               }
+       } else if (sb->bblog_offset == 0)
+               rdev->badblocks.shift = -1;
+
        if (!refdev) {
                ret = 1;
        } else {
                __u64 ev1, ev2;
-               struct mdp_superblock_1 *refsb = 
-                       (struct mdp_superblock_1*)page_address(refdev->sb_page);
+               struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
 
                if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
                    sb->level != refsb->level ||
@@ -1513,7 +1609,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 
 static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
-       struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+       struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
        __u64 ev1 = le64_to_cpu(sb->events);
 
        rdev->raid_disk = -1;
@@ -1619,13 +1715,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        int max_dev, i;
        /* make rdev->sb match mddev and rdev data. */
 
-       sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+       sb = page_address(rdev->sb_page);
 
        sb->feature_map = 0;
        sb->pad0 = 0;
        sb->recovery_offset = cpu_to_le64(0);
        memset(sb->pad1, 0, sizeof(sb->pad1));
-       memset(sb->pad2, 0, sizeof(sb->pad2));
        memset(sb->pad3, 0, sizeof(sb->pad3));
 
        sb->utime = cpu_to_le64((__u64)mddev->utime);
@@ -1665,6 +1760,40 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
        }
 
+       if (rdev->badblocks.count == 0)
+               /* Nothing to do for bad blocks*/ ;
+       else if (sb->bblog_offset == 0)
+               /* Cannot record bad blocks on this device */
+               md_error(mddev, rdev);
+       else {
+               struct badblocks *bb = &rdev->badblocks;
+               u64 *bbp = (u64 *)page_address(rdev->bb_page);
+               u64 *p = bb->page;
+               sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
+               if (bb->changed) {
+                       unsigned seq;
+
+retry:
+                       seq = read_seqbegin(&bb->lock);
+
+                       memset(bbp, 0xff, PAGE_SIZE);
+
+                       for (i = 0 ; i < bb->count ; i++) {
+                               u64 internal_bb = *p++;
+                               u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
+                                               | BB_LEN(internal_bb));
+                               *bbp++ = cpu_to_le64(store_bb);
+                       }
+                       if (read_seqretry(&bb->lock, seq))
+                               goto retry;
+
+                       bb->sector = (rdev->sb_start +
+                                     (int)le32_to_cpu(sb->bblog_offset));
+                       bb->size = le16_to_cpu(sb->bblog_size);
+                       bb->changed = 0;
+               }
+       }
+
        max_dev = 0;
        list_for_each_entry(rdev2, &mddev->disks, same_set)
                if (rdev2->desc_nr+1 > max_dev)
@@ -1724,7 +1853,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
                        num_sectors = max_sectors;
                rdev->sb_start = sb_start;
        }
-       sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
+       sb = page_address(rdev->sb_page);
        sb->data_size = cpu_to_le64(num_sectors);
        sb->super_offset = rdev->sb_start;
        sb->sb_csum = calc_sb_1_csum(sb);
@@ -1922,7 +2051,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
        bd_link_disk_holder(rdev->bdev, mddev->gendisk);
 
        /* May as well allow recovery to be retried once */
-       mddev->recovery_disabled = 0;
+       mddev->recovery_disabled++;
 
        return 0;
 
@@ -1953,6 +2082,9 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
        sysfs_remove_link(&rdev->kobj, "block");
        sysfs_put(rdev->sysfs_state);
        rdev->sysfs_state = NULL;
+       kfree(rdev->badblocks.page);
+       rdev->badblocks.count = 0;
+       rdev->badblocks.page = NULL;
        /* We need to delay this, otherwise we can deadlock when
         * writing to 'remove' to "dev/state".  We also need
         * to delay it due to rcu usage.
@@ -2127,10 +2259,10 @@ static void print_rdev(mdk_rdev_t *rdev, int major_version)
                printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
                switch (major_version) {
                case 0:
-                       print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+                       print_sb_90(page_address(rdev->sb_page));
                        break;
                case 1:
-                       print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+                       print_sb_1(page_address(rdev->sb_page));
                        break;
                }
        } else
@@ -2194,6 +2326,7 @@ static void md_update_sb(mddev_t * mddev, int force_change)
        mdk_rdev_t *rdev;
        int sync_req;
        int nospares = 0;
+       int any_badblocks_changed = 0;
 
 repeat:
        /* First make sure individual recovery_offsets are correct */
@@ -2208,8 +2341,18 @@ repeat:
        if (!mddev->persistent) {
                clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
                clear_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (!mddev->external)
+               if (!mddev->external) {
                        clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+                       list_for_each_entry(rdev, &mddev->disks, same_set) {
+                               if (rdev->badblocks.changed) {
+                                       md_ack_all_badblocks(&rdev->badblocks);
+                                       md_error(mddev, rdev);
+                               }
+                               clear_bit(Blocked, &rdev->flags);
+                               clear_bit(BlockedBadBlocks, &rdev->flags);
+                               wake_up(&rdev->blocked_wait);
+                       }
+               }
                wake_up(&mddev->sb_wait);
                return;
        }
@@ -2265,6 +2408,14 @@ repeat:
                MD_BUG();
                mddev->events --;
        }
+
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (rdev->badblocks.changed)
+                       any_badblocks_changed++;
+               if (test_bit(Faulty, &rdev->flags))
+                       set_bit(FaultRecorded, &rdev->flags);
+       }
+
        sync_sbs(mddev, nospares);
        spin_unlock_irq(&mddev->write_lock);
 
@@ -2290,6 +2441,13 @@ repeat:
                                bdevname(rdev->bdev,b),
                                (unsigned long long)rdev->sb_start);
                        rdev->sb_events = mddev->events;
+                       if (rdev->badblocks.size) {
+                               md_super_write(mddev, rdev,
+                                              rdev->badblocks.sector,
+                                              rdev->badblocks.size << 9,
+                                              rdev->bb_page);
+                               rdev->badblocks.size = 0;
+                       }
 
                } else
                        dprintk(")\n");
@@ -2313,6 +2471,15 @@ repeat:
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (test_and_clear_bit(FaultRecorded, &rdev->flags))
+                       clear_bit(Blocked, &rdev->flags);
+
+               if (any_badblocks_changed)
+                       md_ack_all_badblocks(&rdev->badblocks);
+               clear_bit(BlockedBadBlocks, &rdev->flags);
+               wake_up(&rdev->blocked_wait);
+       }
 }
 
 /* words written to sysfs files may, or may not, be \n terminated.
@@ -2347,7 +2514,8 @@ state_show(mdk_rdev_t *rdev, char *page)
        char *sep = "";
        size_t len = 0;
 
-       if (test_bit(Faulty, &rdev->flags)) {
+       if (test_bit(Faulty, &rdev->flags) ||
+           rdev->badblocks.unacked_exist) {
                len+= sprintf(page+len, "%sfaulty",sep);
                sep = ",";
        }
@@ -2359,7 +2527,8 @@ state_show(mdk_rdev_t *rdev, char *page)
                len += sprintf(page+len, "%swrite_mostly",sep);
                sep = ",";
        }
-       if (test_bit(Blocked, &rdev->flags)) {
+       if (test_bit(Blocked, &rdev->flags) ||
+           rdev->badblocks.unacked_exist) {
                len += sprintf(page+len, "%sblocked", sep);
                sep = ",";
        }
@@ -2368,6 +2537,10 @@ state_show(mdk_rdev_t *rdev, char *page)
                len += sprintf(page+len, "%sspare", sep);
                sep = ",";
        }
+       if (test_bit(WriteErrorSeen, &rdev->flags)) {
+               len += sprintf(page+len, "%swrite_error", sep);
+               sep = ",";
+       }
        return len+sprintf(page+len, "\n");
 }
 
@@ -2375,13 +2548,15 @@ static ssize_t
 state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
        /* can write
-        *  faulty  - simulates and error
+        *  faulty  - simulates an error
         *  remove  - disconnects the device
         *  writemostly - sets write_mostly
         *  -writemostly - clears write_mostly
-        *  blocked - sets the Blocked flag
-        *  -blocked - clears the Blocked flag
+        *  blocked - sets the Blocked flags
+        *  -blocked - clears the Blocked and possibly simulates an error
         *  insync - sets Insync providing device isn't active
+        *  write_error - sets WriteErrorSeen
+        *  -write_error - clears WriteErrorSeen
         */
        int err = -EINVAL;
        if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -2408,7 +2583,15 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                set_bit(Blocked, &rdev->flags);
                err = 0;
        } else if (cmd_match(buf, "-blocked")) {
+               if (!test_bit(Faulty, &rdev->flags) &&
+                   test_bit(BlockedBadBlocks, &rdev->flags)) {
+                       /* metadata handler doesn't understand badblocks,
+                        * so we need to fail the device
+                        */
+                       md_error(rdev->mddev, rdev);
+               }
                clear_bit(Blocked, &rdev->flags);
+               clear_bit(BlockedBadBlocks, &rdev->flags);
                wake_up(&rdev->blocked_wait);
                set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                md_wakeup_thread(rdev->mddev->thread);
@@ -2417,6 +2600,12 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
        } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
                set_bit(In_sync, &rdev->flags);
                err = 0;
+       } else if (cmd_match(buf, "write_error")) {
+               set_bit(WriteErrorSeen, &rdev->flags);
+               err = 0;
+       } else if (cmd_match(buf, "-write_error")) {
+               clear_bit(WriteErrorSeen, &rdev->flags);
+               err = 0;
        }
        if (!err)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -2459,7 +2648,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
        char *e;
        int err;
-       char nm[20];
        int slot = simple_strtoul(buf, &e, 10);
        if (strncmp(buf, "none", 4)==0)
                slot = -1;
@@ -2482,8 +2670,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                        hot_remove_disk(rdev->mddev, rdev->raid_disk);
                if (err)
                        return err;
-               sprintf(nm, "rd%d", rdev->raid_disk);
-               sysfs_remove_link(&rdev->mddev->kobj, nm);
+               sysfs_unlink_rdev(rdev->mddev, rdev);
                rdev->raid_disk = -1;
                set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                md_wakeup_thread(rdev->mddev->thread);
@@ -2522,8 +2709,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                        return err;
                } else
                        sysfs_notify_dirent_safe(rdev->sysfs_state);
-               sprintf(nm, "rd%d", rdev->raid_disk);
-               if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
+               if (sysfs_link_rdev(rdev->mddev, rdev))
                        /* failure here is OK */;
                /* don't wakeup anyone, leave that to userspace. */
        } else {
@@ -2712,6 +2898,39 @@ static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t le
 static struct rdev_sysfs_entry rdev_recovery_start =
 __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
 
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack);
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
+
+static ssize_t bb_show(mdk_rdev_t *rdev, char *page)
+{
+       return badblocks_show(&rdev->badblocks, page, 0);
+}
+static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+       int rv = badblocks_store(&rdev->badblocks, page, len, 0);
+       /* Maybe that ack was all we needed */
+       if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
+               wake_up(&rdev->blocked_wait);
+       return rv;
+}
+static struct rdev_sysfs_entry rdev_bad_blocks =
+__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
+
+
+static ssize_t ubb_show(mdk_rdev_t *rdev, char *page)
+{
+       return badblocks_show(&rdev->badblocks, page, 1);
+}
+static ssize_t ubb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+       return badblocks_store(&rdev->badblocks, page, len, 1);
+}
+static struct rdev_sysfs_entry rdev_unack_bad_blocks =
+__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
+
 static struct attribute *rdev_default_attrs[] = {
        &rdev_state.attr,
        &rdev_errors.attr,
@@ -2719,6 +2938,8 @@ static struct attribute *rdev_default_attrs[] = {
        &rdev_offset.attr,
        &rdev_size.attr,
        &rdev_recovery_start.attr,
+       &rdev_bad_blocks.attr,
+       &rdev_unack_bad_blocks.attr,
        NULL,
 };
 static ssize_t
@@ -2782,7 +3003,7 @@ static struct kobj_type rdev_ktype = {
        .default_attrs  = rdev_default_attrs,
 };
 
-void md_rdev_init(mdk_rdev_t *rdev)
+int md_rdev_init(mdk_rdev_t *rdev)
 {
        rdev->desc_nr = -1;
        rdev->saved_raid_disk = -1;
@@ -2792,12 +3013,27 @@ void md_rdev_init(mdk_rdev_t *rdev)
        rdev->sb_events = 0;
        rdev->last_read_error.tv_sec  = 0;
        rdev->last_read_error.tv_nsec = 0;
+       rdev->sb_loaded = 0;
+       rdev->bb_page = NULL;
        atomic_set(&rdev->nr_pending, 0);
        atomic_set(&rdev->read_errors, 0);
        atomic_set(&rdev->corrected_errors, 0);
 
        INIT_LIST_HEAD(&rdev->same_set);
        init_waitqueue_head(&rdev->blocked_wait);
+
+       /* Add space to store bad block list.
+        * This reserves the space even on arrays where it cannot
+        * be used - I wonder if that matters
+        */
+       rdev->badblocks.count = 0;
+       rdev->badblocks.shift = 0;
+       rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       seqlock_init(&rdev->badblocks.lock);
+       if (rdev->badblocks.page == NULL)
+               return -ENOMEM;
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(md_rdev_init);
 /*
@@ -2823,8 +3059,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
                return ERR_PTR(-ENOMEM);
        }
 
-       md_rdev_init(rdev);
-       if ((err = alloc_disk_sb(rdev)))
+       err = md_rdev_init(rdev);
+       if (err)
+               goto abort_free;
+       err = alloc_disk_sb(rdev);
+       if (err)
                goto abort_free;
 
        err = lock_rdev(rdev, newdev, super_format == -2);
@@ -2860,15 +3099,17 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
                        goto abort_free;
                }
        }
+       if (super_format == -1)
+               /* hot-add for 0.90, or non-persistent: so no badblocks */
+               rdev->badblocks.shift = -1;
 
        return rdev;
 
 abort_free:
-       if (rdev->sb_page) {
-               if (rdev->bdev)
-                       unlock_rdev(rdev);
-               free_disk_sb(rdev);
-       }
+       if (rdev->bdev)
+               unlock_rdev(rdev);
+       free_disk_sb(rdev);
+       kfree(rdev->badblocks.page);
        kfree(rdev);
        return ERR_PTR(err);
 }
@@ -3149,15 +3390,13 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
        }
 
        list_for_each_entry(rdev, &mddev->disks, same_set) {
-               char nm[20];
                if (rdev->raid_disk < 0)
                        continue;
                if (rdev->new_raid_disk >= mddev->raid_disks)
                        rdev->new_raid_disk = -1;
                if (rdev->new_raid_disk == rdev->raid_disk)
                        continue;
-               sprintf(nm, "rd%d", rdev->raid_disk);
-               sysfs_remove_link(&mddev->kobj, nm);
+               sysfs_unlink_rdev(mddev, rdev);
        }
        list_for_each_entry(rdev, &mddev->disks, same_set) {
                if (rdev->raid_disk < 0)
@@ -3168,11 +3407,10 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
                if (rdev->raid_disk < 0)
                        clear_bit(In_sync, &rdev->flags);
                else {
-                       char nm[20];
-                       sprintf(nm, "rd%d", rdev->raid_disk);
-                       if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
-                               printk("md: cannot register %s for %s after level change\n",
-                                      nm, mdname(mddev));
+                       if (sysfs_link_rdev(mddev, rdev))
+                               printk(KERN_WARNING "md: cannot register rd%d"
+                                      " for %s after level change\n",
+                                      rdev->raid_disk, mdname(mddev));
                }
        }
 
@@ -4504,7 +4742,8 @@ int md_run(mddev_t *mddev)
        }
 
        if (mddev->bio_set == NULL)
-               mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
+               mddev->bio_set = bioset_create(BIO_POOL_SIZE,
+                                              sizeof(mddev_t *));
 
        spin_lock(&pers_lock);
        pers = find_pers(mddev->level, mddev->clevel);
@@ -4621,12 +4860,9 @@ int md_run(mddev_t *mddev)
        smp_wmb();
        mddev->ready = 1;
        list_for_each_entry(rdev, &mddev->disks, same_set)
-               if (rdev->raid_disk >= 0) {
-                       char nm[20];
-                       sprintf(nm, "rd%d", rdev->raid_disk);
-                       if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
+               if (rdev->raid_disk >= 0)
+                       if (sysfs_link_rdev(mddev, rdev))
                                /* failure here is OK */;
-               }
        
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        
@@ -4854,11 +5090,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                sysfs_notify_dirent_safe(mddev->sysfs_state);
 
                list_for_each_entry(rdev, &mddev->disks, same_set)
-                       if (rdev->raid_disk >= 0) {
-                               char nm[20];
-                               sprintf(nm, "rd%d", rdev->raid_disk);
-                               sysfs_remove_link(&mddev->kobj, nm);
-                       }
+                       if (rdev->raid_disk >= 0)
+                               sysfs_unlink_rdev(mddev, rdev);
 
                set_capacity(disk, 0);
                mutex_unlock(&mddev->open_mutex);
@@ -6198,18 +6431,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
        if (!rdev || test_bit(Faulty, &rdev->flags))
                return;
 
-       if (mddev->external)
-               set_bit(Blocked, &rdev->flags);
-/*
-       dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
-               mdname(mddev),
-               MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
-               __builtin_return_address(0),__builtin_return_address(1),
-               __builtin_return_address(2),__builtin_return_address(3));
-*/
-       if (!mddev->pers)
-               return;
-       if (!mddev->pers->error_handler)
+       if (!mddev->pers || !mddev->pers->error_handler)
                return;
        mddev->pers->error_handler(mddev,rdev);
        if (mddev->degraded)
@@ -6933,11 +7155,14 @@ void md_do_sync(mddev_t *mddev)
                        atomic_add(sectors, &mddev->recovery_active);
                }
 
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
+
                j += sectors;
                if (j>1) mddev->curr_resync = j;
                mddev->curr_mark_cnt = io_sectors;
                if (last_check == 0)
-                       /* this is the earliers that rebuilt will be
+                       /* this is the earliest that rebuild will be
                         * visible in /proc/mdstat
                         */
                        md_new_event(mddev);
@@ -6946,10 +7171,6 @@ void md_do_sync(mddev_t *mddev)
                        continue;
 
                last_check = io_sectors;
-
-               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-                       break;
-
        repeat:
                if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
                        /* step marks */
@@ -7067,29 +7288,23 @@ static int remove_and_add_spares(mddev_t *mddev)
                    atomic_read(&rdev->nr_pending)==0) {
                        if (mddev->pers->hot_remove_disk(
                                    mddev, rdev->raid_disk)==0) {
-                               char nm[20];
-                               sprintf(nm,"rd%d", rdev->raid_disk);
-                               sysfs_remove_link(&mddev->kobj, nm);
+                               sysfs_unlink_rdev(mddev, rdev);
                                rdev->raid_disk = -1;
                        }
                }
 
-       if (mddev->degraded && !mddev->recovery_disabled) {
+       if (mddev->degraded) {
                list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
-                           !test_bit(Faulty, &rdev->flags) &&
-                           !test_bit(Blocked, &rdev->flags))
+                           !test_bit(Faulty, &rdev->flags))
                                spares++;
                        if (rdev->raid_disk < 0
                            && !test_bit(Faulty, &rdev->flags)) {
                                rdev->recovery_offset = 0;
                                if (mddev->pers->
                                    hot_add_disk(mddev, rdev) == 0) {
-                                       char nm[20];
-                                       sprintf(nm, "rd%d", rdev->raid_disk);
-                                       if (sysfs_create_link(&mddev->kobj,
-                                                             &rdev->kobj, nm))
+                                       if (sysfs_link_rdev(mddev, rdev))
                                                /* failure here is OK */;
                                        spares++;
                                        md_new_event(mddev);
@@ -7138,6 +7353,8 @@ static void reap_sync_thread(mddev_t *mddev)
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        sysfs_notify_dirent_safe(mddev->sysfs_action);
        md_new_event(mddev);
+       if (mddev->event_work.func)
+               queue_work(md_misc_wq, &mddev->event_work);
 }
 
 /*
@@ -7170,9 +7387,6 @@ void md_check_recovery(mddev_t *mddev)
        if (mddev->bitmap)
                bitmap_daemon_work(mddev);
 
-       if (mddev->ro)
-               return;
-
        if (signal_pending(current)) {
                if (mddev->pers->sync_request && !mddev->external) {
                        printk(KERN_INFO "md: %s in immediate safe mode\n",
@@ -7209,9 +7423,7 @@ void md_check_recovery(mddev_t *mddev)
                                    atomic_read(&rdev->nr_pending)==0) {
                                        if (mddev->pers->hot_remove_disk(
                                                    mddev, rdev->raid_disk)==0) {
-                                               char nm[20];
-                                               sprintf(nm,"rd%d", rdev->raid_disk);
-                                               sysfs_remove_link(&mddev->kobj, nm);
+                                               sysfs_unlink_rdev(mddev, rdev);
                                                rdev->raid_disk = -1;
                                        }
                                }
@@ -7331,12 +7543,499 @@ void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
 {
        sysfs_notify_dirent_safe(rdev->sysfs_state);
        wait_event_timeout(rdev->blocked_wait,
-                          !test_bit(Blocked, &rdev->flags),
+                          !test_bit(Blocked, &rdev->flags) &&
+                          !test_bit(BlockedBadBlocks, &rdev->flags),
                           msecs_to_jiffies(5000));
        rdev_dec_pending(rdev, mddev);
 }
 EXPORT_SYMBOL(md_wait_for_blocked_rdev);
 
+
+/* Bad block management.
+ * We can record which blocks on each device are 'bad' and so just
+ * fail those blocks, or that stripe, rather than the whole device.
+ * Entries in the bad-block table are 64bits wide.  This comprises:
+ * Length of bad-range, in sectors: 0-511 for lengths 1-512
+ * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
+ *  A 'shift' can be set so that larger blocks are tracked and
+ *  consequently larger devices can be covered.
+ * 'Acknowledged' flag - 1 bit. - the most significant bit.
+ *
+ * Locking of the bad-block table uses a seqlock so md_is_badblock
+ * might need to retry if it is very unlucky.
+ * We will sometimes want to check for bad blocks in a bi_end_io function,
+ * so we use the write_seqlock_irq variant.
+ *
+ * When looking for a bad block we specify a range and want to
+ * know if any block in the range is bad.  So we binary-search
+ * to the last range that starts at-or-before the given endpoint,
+ * (or "before the sector after the target range")
+ * then see if it ends after the given start.
+ * We return
+ *  0 if there are no known bad blocks in the range
+ *  1 if there are known bad block which are all acknowledged
+ * -1 if there are bad blocks which have not yet been acknowledged in metadata.
+ * plus the start/length of the first bad section we overlap.
+ */
+int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+                  sector_t *first_bad, int *bad_sectors)
+{
+       int hi;
+       int lo = 0;
+       u64 *p = bb->page;
+       int rv = 0;
+       sector_t target = s + sectors;
+       unsigned seq;
+
+       if (bb->shift > 0) {
+               /* round the start down, and the end up */
+               s >>= bb->shift;
+               target += (1<<bb->shift) - 1;
+               target >>= bb->shift;
+               sectors = target - s;
+       }
+       /* 'target' is now the first block after the bad range */
+
+retry:
+       seq = read_seqbegin(&bb->lock);
+
+       hi = bb->count;
+
+       /* Binary search between lo and hi for 'target'
+        * i.e. for the last range that starts before 'target'
+        */
+       /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
+        * are known not to be the last range before target.
+        * VARIANT: hi-lo is the number of possible
+        * ranges, and decreases until it reaches 1
+        */
+       while (hi - lo > 1) {
+               int mid = (lo + hi) / 2;
+               sector_t a = BB_OFFSET(p[mid]);
+               if (a < target)
+                       /* This could still be the one, earlier ranges
+                        * could not. */
+                       lo = mid;
+               else
+                       /* This and later ranges are definitely out. */
+                       hi = mid;
+       }
+       /* 'lo' might be the last that started before target, but 'hi' isn't */
+       if (hi > lo) {
+               /* need to check all range that end after 's' to see if
+                * any are unacknowledged.
+                */
+               while (lo >= 0 &&
+                      BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+                       if (BB_OFFSET(p[lo]) < target) {
+                               /* starts before the end, and finishes after
+                                * the start, so they must overlap
+                                */
+                               if (rv != -1 && BB_ACK(p[lo]))
+                                       rv = 1;
+                               else
+                                       rv = -1;
+                               *first_bad = BB_OFFSET(p[lo]);
+                               *bad_sectors = BB_LEN(p[lo]);
+                       }
+                       lo--;
+               }
+       }
+
+       if (read_seqretry(&bb->lock, seq))
+               goto retry;
+
+       return rv;
+}
+EXPORT_SYMBOL_GPL(md_is_badblock);
+
+/*
+ * Add a range of bad blocks to the table.
+ * This might extend the table, or might contract it
+ * if two adjacent ranges can be merged.
+ * We binary-search to find the 'insertion' point, then
+ * decide how best to handle it.
+ */
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+                           int acknowledged)
+{
+       u64 *p;
+       int lo, hi;
+       int rv = 1;
+
+       if (bb->shift < 0)
+               /* badblocks are disabled */
+               return 0;
+
+       if (bb->shift) {
+               /* round the start down, and the end up */
+               sector_t next = s + sectors;
+               s >>= bb->shift;
+               next += (1<<bb->shift) - 1;
+               next >>= bb->shift;
+               sectors = next - s;
+       }
+
+       write_seqlock_irq(&bb->lock);
+
+       p = bb->page;
+       lo = 0;
+       hi = bb->count;
+       /* Find the last range that starts at-or-before 's' */
+       while (hi - lo > 1) {
+               int mid = (lo + hi) / 2;
+               sector_t a = BB_OFFSET(p[mid]);
+               if (a <= s)
+                       lo = mid;
+               else
+                       hi = mid;
+       }
+       if (hi > lo && BB_OFFSET(p[lo]) > s)
+               hi = lo;
+
+       if (hi > lo) {
+               /* we found a range that might merge with the start
+                * of our new range
+                */
+               sector_t a = BB_OFFSET(p[lo]);
+               sector_t e = a + BB_LEN(p[lo]);
+               int ack = BB_ACK(p[lo]);
+               if (e >= s) {
+                       /* Yes, we can merge with a previous range */
+                       if (s == a && s + sectors >= e)
+                               /* new range covers old */
+                               ack = acknowledged;
+                       else
+                               ack = ack && acknowledged;
+
+                       if (e < s + sectors)
+                               e = s + sectors;
+                       if (e - a <= BB_MAX_LEN) {
+                               p[lo] = BB_MAKE(a, e-a, ack);
+                               s = e;
+                       } else {
+                               /* does not all fit in one range,
+                                * make p[lo] maximal
+                                */
+                               if (BB_LEN(p[lo]) != BB_MAX_LEN)
+                                       p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
+                               s = a + BB_MAX_LEN;
+                       }
+                       sectors = e - s;
+               }
+       }
+       if (sectors && hi < bb->count) {
+               /* 'hi' points to the first range that starts after 's'.
+                * Maybe we can merge with the start of that range */
+               sector_t a = BB_OFFSET(p[hi]);
+               sector_t e = a + BB_LEN(p[hi]);
+               int ack = BB_ACK(p[hi]);
+               if (a <= s + sectors) {
+                       /* merging is possible */
+                       if (e <= s + sectors) {
+                               /* full overlap */
+                               e = s + sectors;
+                               ack = acknowledged;
+                       } else
+                               ack = ack && acknowledged;
+
+                       a = s;
+                       if (e - a <= BB_MAX_LEN) {
+                               p[hi] = BB_MAKE(a, e-a, ack);
+                               s = e;
+                       } else {
+                               p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
+                               s = a + BB_MAX_LEN;
+                       }
+                       sectors = e - s;
+                       lo = hi;
+                       hi++;
+               }
+       }
+       if (sectors == 0 && hi < bb->count) {
+               /* we might be able to combine lo and hi */
+               /* Note: 's' is at the end of 'lo' */
+               sector_t a = BB_OFFSET(p[hi]);
+               int lolen = BB_LEN(p[lo]);
+               int hilen = BB_LEN(p[hi]);
+               int newlen = lolen + hilen - (s - a);
+               if (s >= a && newlen < BB_MAX_LEN) {
+                       /* yes, we can combine them */
+                       int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
+                       p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
+                       memmove(p + hi, p + hi + 1,
+                               (bb->count - hi - 1) * 8);
+                       bb->count--;
+               }
+       }
+       while (sectors) {
+               /* didn't merge (it all).
+                * Need to add a range just before 'hi' */
+               if (bb->count >= MD_MAX_BADBLOCKS) {
+                       /* No room for more */
+                       rv = 0;
+                       break;
+               } else {
+                       int this_sectors = sectors;
+                       memmove(p + hi + 1, p + hi,
+                               (bb->count - hi) * 8);
+                       bb->count++;
+
+                       if (this_sectors > BB_MAX_LEN)
+                               this_sectors = BB_MAX_LEN;
+                       p[hi] = BB_MAKE(s, this_sectors, acknowledged);
+                       sectors -= this_sectors;
+                       s += this_sectors;
+               }
+       }
+
+       bb->changed = 1;
+       if (!acknowledged)
+               bb->unacked_exist = 1;
+       write_sequnlock_irq(&bb->lock);
+
+       return rv;
+}
+
+int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors,
+                      int acknowledged)
+{
+       int rv = md_set_badblocks(&rdev->badblocks,
+                                 s + rdev->data_offset, sectors, acknowledged);
+       if (rv) {
+               /* Make sure they get written out promptly */
+               set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
+               md_wakeup_thread(rdev->mddev->thread);
+       }
+       return rv;
+}
+EXPORT_SYMBOL_GPL(rdev_set_badblocks);
+
+/*
+ * Remove a range of bad blocks from the table.
+ * This may involve extending the table if we spilt a region,
+ * but it must not fail.  So if the table becomes full, we just
+ * drop the remove request.
+ */
+static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
+{
+       u64 *p;
+       int lo, hi;
+       sector_t target = s + sectors;
+       int rv = 0;
+
+       if (bb->shift > 0) {
+               /* When clearing we round the start up and the end down.
+                * This should not matter as the shift should align with
+                * the block size and no rounding should ever be needed.
+                * However it is better the think a block is bad when it
+                * isn't than to think a block is not bad when it is.
+                */
+               s += (1<<bb->shift) - 1;
+               s >>= bb->shift;
+               target >>= bb->shift;
+               sectors = target - s;
+       }
+
+       write_seqlock_irq(&bb->lock);
+
+       p = bb->page;
+       lo = 0;
+       hi = bb->count;
+       /* Find the last range that starts before 'target' */
+       while (hi - lo > 1) {
+               int mid = (lo + hi) / 2;
+               sector_t a = BB_OFFSET(p[mid]);
+               if (a < target)
+                       lo = mid;
+               else
+                       hi = mid;
+       }
+       if (hi > lo) {
+               /* p[lo] is the last range that could overlap the
+                * current range.  Earlier ranges could also overlap,
+                * but only this one can overlap the end of the range.
+                */
+               if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+                       /* Partial overlap, leave the tail of this range */
+                       int ack = BB_ACK(p[lo]);
+                       sector_t a = BB_OFFSET(p[lo]);
+                       sector_t end = a + BB_LEN(p[lo]);
+
+                       if (a < s) {
+                               /* we need to split this range */
+                               if (bb->count >= MD_MAX_BADBLOCKS) {
+                                       rv = 0;
+                                       goto out;
+                               }
+                               memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+                               bb->count++;
+                               p[lo] = BB_MAKE(a, s-a, ack);
+                               lo++;
+                       }
+                       p[lo] = BB_MAKE(target, end - target, ack);
+                       /* there is no longer an overlap */
+                       hi = lo;
+                       lo--;
+               }
+               while (lo >= 0 &&
+                      BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+                       /* This range does overlap */
+                       if (BB_OFFSET(p[lo]) < s) {
+                               /* Keep the early parts of this range. */
+                               int ack = BB_ACK(p[lo]);
+                               sector_t start = BB_OFFSET(p[lo]);
+                               p[lo] = BB_MAKE(start, s - start, ack);
+                               /* now low doesn't overlap, so.. */
+                               break;
+                       }
+                       lo--;
+               }
+               /* 'lo' is strictly before, 'hi' is strictly after,
+                * anything between needs to be discarded
+                */
+               if (hi - lo > 1) {
+                       memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
+                       bb->count -= (hi - lo - 1);
+               }
+       }
+
+       bb->changed = 1;
+out:
+       write_sequnlock_irq(&bb->lock);
+       return rv;
+}
+
+int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors)
+{
+       return md_clear_badblocks(&rdev->badblocks,
+                                 s + rdev->data_offset,
+                                 sectors);
+}
+EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
+
+/*
+ * Acknowledge all bad blocks in a list.
+ * This only succeeds if ->changed is clear.  It is used by
+ * in-kernel metadata updates
+ */
+void md_ack_all_badblocks(struct badblocks *bb)
+{
+       if (bb->page == NULL || bb->changed)
+               /* no point even trying */
+               return;
+       write_seqlock_irq(&bb->lock);
+
+       if (bb->changed == 0) {
+               u64 *p = bb->page;
+               int i;
+               for (i = 0; i < bb->count ; i++) {
+                       if (!BB_ACK(p[i])) {
+                               sector_t start = BB_OFFSET(p[i]);
+                               int len = BB_LEN(p[i]);
+                               p[i] = BB_MAKE(start, len, 1);
+                       }
+               }
+               bb->unacked_exist = 0;
+       }
+       write_sequnlock_irq(&bb->lock);
+}
+EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
+
+/* sysfs access to bad-blocks list.
+ * We present two files.
+ * 'bad-blocks' lists sector numbers and lengths of ranges that
+ *    are recorded as bad.  The list is truncated to fit within
+ *    the one-page limit of sysfs.
+ *    Writing "sector length" to this file adds an acknowledged
+ *    bad block list.
+ * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
+ *    been acknowledged.  Writing to this file adds bad blocks
+ *    without acknowledging them.  This is largely for testing.
+ */
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack)
+{
+       size_t len;
+       int i;
+       u64 *p = bb->page;
+       unsigned seq;
+
+       if (bb->shift < 0)
+               return 0;
+
+retry:
+       seq = read_seqbegin(&bb->lock);
+
+       len = 0;
+       i = 0;
+
+       while (len < PAGE_SIZE && i < bb->count) {
+               sector_t s = BB_OFFSET(p[i]);
+               unsigned int length = BB_LEN(p[i]);
+               int ack = BB_ACK(p[i]);
+               i++;
+
+               if (unack && ack)
+                       continue;
+
+               len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
+                               (unsigned long long)s << bb->shift,
+                               length << bb->shift);
+       }
+       if (unack && len == 0)
+               bb->unacked_exist = 0;
+
+       if (read_seqretry(&bb->lock, seq))
+               goto retry;
+
+       return len;
+}
+
+#define DO_DEBUG 1
+
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
+{
+       unsigned long long sector;
+       int length;
+       char newline;
+#ifdef DO_DEBUG
+       /* Allow clearing via sysfs *only* for testing/debugging.
+        * Normally only a successful write may clear a badblock
+        */
+       int clear = 0;
+       if (page[0] == '-') {
+               clear = 1;
+               page++;
+       }
+#endif /* DO_DEBUG */
+
+       switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
+       case 3:
+               if (newline != '\n')
+                       return -EINVAL;
+       case 2:
+               if (length <= 0)
+                       return -EINVAL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+#ifdef DO_DEBUG
+       if (clear) {
+               md_clear_badblocks(bb, sector, length);
+               return len;
+       }
+#endif /* DO_DEBUG */
+       if (md_set_badblocks(bb, sector, length, !unack))
+               return len;
+       else
+               return -ENOSPC;
+}
+
 static int md_notify_reboot(struct notifier_block *this,
                            unsigned long code, void *x)
 {
index 1c26c7a..1e586bb 100644 (file)
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
 
+/* Bad block numbers are stored sorted in a single page.
+ * 64bits is used for each block or extent.
+ * 54 bits are sector number, 9 bits are extent size,
+ * 1 bit is an 'acknowledged' flag.
+ */
+#define MD_MAX_BADBLOCKS       (PAGE_SIZE/8)
+
 /*
  * MD's 'extended' device
  */
@@ -48,7 +55,7 @@ struct mdk_rdev_s
        struct block_device *meta_bdev;
        struct block_device *bdev;      /* block device handle */
 
-       struct page     *sb_page;
+       struct page     *sb_page, *bb_page;
        int             sb_loaded;
        __u64           sb_events;
        sector_t        data_offset;    /* start of data in array */
@@ -74,9 +81,29 @@ struct mdk_rdev_s
 #define        In_sync         2               /* device is in_sync with rest of array */
 #define        WriteMostly     4               /* Avoid reading if at all possible */
 #define        AutoDetected    7               /* added by auto-detect */
-#define Blocked                8               /* An error occurred on an externally
-                                        * managed array, don't allow writes
+#define Blocked                8               /* An error occurred but has not yet
+                                        * been acknowledged by the metadata
+                                        * handler, so don't allow writes
                                         * until it is cleared */
+#define WriteErrorSeen 9               /* A write error has been seen on this
+                                        * device
+                                        */
+#define FaultRecorded  10              /* Intermediate state for clearing
+                                        * Blocked.  The Fault is/will-be
+                                        * recorded in the metadata, but that
+                                        * metadata hasn't been stored safely
+                                        * on disk yet.
+                                        */
+#define BlockedBadBlocks 11            /* A writer is blocked because they
+                                        * found an unacknowledged bad-block.
+                                        * This can safely be cleared at any
+                                        * time, and the writer will re-check.
+                                        * It may be set at any time, and at
+                                        * worst the writer will timeout and
+                                        * re-check.  So setting it as
+                                        * accurately as possible is good, but
+                                        * not absolutely critical.
+                                        */
        wait_queue_head_t blocked_wait;
 
        int desc_nr;                    /* descriptor index in the superblock */
@@ -111,8 +138,54 @@ struct mdk_rdev_s
 
        struct sysfs_dirent *sysfs_state; /* handle for 'state'
                                           * sysfs entry */
+
+       struct badblocks {
+               int     count;          /* count of bad blocks */
+               int     unacked_exist;  /* there probably are unacknowledged
+                                        * bad blocks.  This is only cleared
+                                        * when a read discovers none
+                                        */
+               int     shift;          /* shift from sectors to block size
+                                        * a -ve shift means badblocks are
+                                        * disabled.*/
+               u64     *page;          /* badblock list */
+               int     changed;
+               seqlock_t lock;
+
+               sector_t sector;
+               sector_t size;          /* in sectors */
+       } badblocks;
 };
 
+#define BB_LEN_MASK    (0x00000000000001FFULL)
+#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL)
+#define BB_ACK_MASK    (0x8000000000000000ULL)
+#define BB_MAX_LEN     512
+#define BB_OFFSET(x)   (((x) & BB_OFFSET_MASK) >> 9)
+#define BB_LEN(x)      (((x) & BB_LEN_MASK) + 1)
+#define BB_ACK(x)      (!!((x) & BB_ACK_MASK))
+#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
+
+extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+                         sector_t *first_bad, int *bad_sectors);
+static inline int is_badblock(mdk_rdev_t *rdev, sector_t s, int sectors,
+                             sector_t *first_bad, int *bad_sectors)
+{
+       if (unlikely(rdev->badblocks.count)) {
+               int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s,
+                                       sectors,
+                                       first_bad, bad_sectors);
+               if (rv)
+                       *first_bad -= rdev->data_offset;
+               return rv;
+       }
+       return 0;
+}
+extern int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors,
+                             int acknowledged);
+extern int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors);
+extern void md_ack_all_badblocks(struct badblocks *bb);
+
 struct mddev_s
 {
        void                            *private;
@@ -239,9 +312,12 @@ struct mddev_s
 #define        MD_RECOVERY_FROZEN      9
 
        unsigned long                   recovery;
-       int                             recovery_disabled; /* if we detect that recovery
-                                                           * will always fail, set this
-                                                           * so we don't loop trying */
+       /* If a RAID personality determines that recovery (of a particular
+        * device) will fail due to a read error on the source device, it
+        * takes a copy of this number and does not attempt recovery again
+        * until this number changes.
+        */
+       int                             recovery_disabled;
 
        int                             in_sync;        /* know to not need resync */
        /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
@@ -304,11 +380,6 @@ struct mddev_s
                                                         * hot-adding a bitmap.  It should
                                                         * eventually be settable by sysfs.
                                                         */
-               /* When md is serving under dm, it might use a
-                * dirty_log to store the bits.
-                */
-               struct dm_dirty_log *log;
-
                struct mutex            mutex;
                unsigned long           chunksize;
                unsigned long           daemon_sleep; /* how many jiffies between updates? */
@@ -413,6 +484,20 @@ static inline char * mdname (mddev_t * mddev)
        return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
 }
 
+static inline int sysfs_link_rdev(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       char nm[20];
+       sprintf(nm, "rd%d", rdev->raid_disk);
+       return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+}
+
+static inline void sysfs_unlink_rdev(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       char nm[20];
+       sprintf(nm, "rd%d", rdev->raid_disk);
+       sysfs_remove_link(&mddev->kobj, nm);
+}
+
 /*
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
@@ -505,7 +590,7 @@ extern void mddev_init(mddev_t *mddev);
 extern int md_run(mddev_t *mddev);
 extern void md_stop(mddev_t *mddev);
 extern void md_stop_writes(mddev_t *mddev);
-extern void md_rdev_init(mdk_rdev_t *rdev);
+extern int md_rdev_init(mdk_rdev_t *rdev);
 
 extern void mddev_suspend(mddev_t *mddev);
 extern void mddev_resume(mddev_t *mddev);
@@ -514,4 +599,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   mddev_t *mddev);
 extern int mddev_check_plugged(mddev_t *mddev);
+extern void md_trim_bio(struct bio *bio, int offset, int size);
 #endif /* _MD_MD_H */
index f7431b6..32323f0 100644 (file)
 #include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
+#include <linux/ratelimit.h>
 #include "md.h"
 #include "raid1.h"
 #include "bitmap.h"
 
 #define DEBUG 0
-#if DEBUG
-#define PRINTK(x...) printk(x)
-#else
-#define PRINTK(x...)
-#endif
+#define PRINTK(x...) do { if (DEBUG) printk(x); } while (0)
 
 /*
  * Number of guaranteed r1bios in case of extreme VM load:
@@ -166,7 +163,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
 
        for (i = 0; i < conf->raid_disks; i++) {
                struct bio **bio = r1_bio->bios + i;
-               if (*bio && *bio != IO_BLOCKED)
+               if (!BIO_SPECIAL(*bio))
                        bio_put(*bio);
                *bio = NULL;
        }
@@ -176,12 +173,6 @@ static void free_r1bio(r1bio_t *r1_bio)
 {
        conf_t *conf = r1_bio->mddev->private;
 
-       /*
-        * Wake up any possible resync thread that waits for the device
-        * to go idle.
-        */
-       allow_barrier(conf);
-
        put_all_bios(conf, r1_bio);
        mempool_free(r1_bio, conf->r1bio_pool);
 }
@@ -222,6 +213,33 @@ static void reschedule_retry(r1bio_t *r1_bio)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
+static void call_bio_endio(r1bio_t *r1_bio)
+{
+       struct bio *bio = r1_bio->master_bio;
+       int done;
+       conf_t *conf = r1_bio->mddev->private;
+
+       if (bio->bi_phys_segments) {
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
+               bio->bi_phys_segments--;
+               done = (bio->bi_phys_segments == 0);
+               spin_unlock_irqrestore(&conf->device_lock, flags);
+       } else
+               done = 1;
+
+       if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       if (done) {
+               bio_endio(bio, 0);
+               /*
+                * Wake up any possible resync thread that waits for the device
+                * to go idle.
+                */
+               allow_barrier(conf);
+       }
+}
+
 static void raid_end_bio_io(r1bio_t *r1_bio)
 {
        struct bio *bio = r1_bio->master_bio;
@@ -234,8 +252,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio)
                        (unsigned long long) bio->bi_sector +
                                (bio->bi_size >> 9) - 1);
 
-               bio_endio(bio,
-                       test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
+               call_bio_endio(r1_bio);
        }
        free_r1bio(r1_bio);
 }
@@ -287,36 +304,52 @@ static void raid1_end_read_request(struct bio *bio, int error)
                 * oops, read error:
                 */
                char b[BDEVNAME_SIZE];
-               if (printk_ratelimit())
-                       printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n",
-                              mdname(conf->mddev),
-                              bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
+               printk_ratelimited(
+                       KERN_ERR "md/raid1:%s: %s: "
+                       "rescheduling sector %llu\n",
+                       mdname(conf->mddev),
+                       bdevname(conf->mirrors[mirror].rdev->bdev,
+                                b),
+                       (unsigned long long)r1_bio->sector);
+               set_bit(R1BIO_ReadError, &r1_bio->state);
                reschedule_retry(r1_bio);
        }
 
        rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 }
 
+static void close_write(r1bio_t *r1_bio)
+{
+       /* it really is the end of this request */
+       if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+               /* free extra copy of the data pages */
+               int i = r1_bio->behind_page_count;
+               while (i--)
+                       safe_put_page(r1_bio->behind_bvecs[i].bv_page);
+               kfree(r1_bio->behind_bvecs);
+               r1_bio->behind_bvecs = NULL;
+       }
+       /* clear the bitmap if all writes complete successfully */
+       bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+                       r1_bio->sectors,
+                       !test_bit(R1BIO_Degraded, &r1_bio->state),
+                       test_bit(R1BIO_BehindIO, &r1_bio->state));
+       md_write_end(r1_bio->mddev);
+}
+
 static void r1_bio_write_done(r1bio_t *r1_bio)
 {
-       if (atomic_dec_and_test(&r1_bio->remaining))
-       {
-               /* it really is the end of this request */
-               if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-                       /* free extra copy of the data pages */
-                       int i = r1_bio->behind_page_count;
-                       while (i--)
-                               safe_put_page(r1_bio->behind_pages[i]);
-                       kfree(r1_bio->behind_pages);
-                       r1_bio->behind_pages = NULL;
-               }
-               /* clear the bitmap if all writes complete successfully */
-               bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
-                               r1_bio->sectors,
-                               !test_bit(R1BIO_Degraded, &r1_bio->state),
-                               test_bit(R1BIO_BehindIO, &r1_bio->state));
-               md_write_end(r1_bio->mddev);
-               raid_end_bio_io(r1_bio);
+       if (!atomic_dec_and_test(&r1_bio->remaining))
+               return;
+
+       if (test_bit(R1BIO_WriteError, &r1_bio->state))
+               reschedule_retry(r1_bio);
+       else {
+               close_write(r1_bio);
+               if (test_bit(R1BIO_MadeGood, &r1_bio->state))
+                       reschedule_retry(r1_bio);
+               else
+                       raid_end_bio_io(r1_bio);
        }
 }
 
@@ -336,13 +369,11 @@ static void raid1_end_write_request(struct bio *bio, int error)
        /*
         * 'one mirror IO has finished' event handler:
         */
-       r1_bio->bios[mirror] = NULL;
-       to_put = bio;
        if (!uptodate) {
-               md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-               /* an I/O failed, we can't clear the bitmap */
-               set_bit(R1BIO_Degraded, &r1_bio->state);
-       } else
+               set_bit(WriteErrorSeen,
+                       &conf->mirrors[mirror].rdev->flags);
+               set_bit(R1BIO_WriteError, &r1_bio->state);
+       } else {
                /*
                 * Set R1BIO_Uptodate in our master bio, so that we
                 * will return a good error code for to the higher
@@ -353,8 +384,22 @@ static void raid1_end_write_request(struct bio *bio, int error)
                 * to user-side. So if something waits for IO, then it
                 * will wait for the 'master' bio.
                 */
+               sector_t first_bad;
+               int bad_sectors;
+
+               r1_bio->bios[mirror] = NULL;
+               to_put = bio;
                set_bit(R1BIO_Uptodate, &r1_bio->state);
 
+               /* Maybe we can clear some bad blocks. */
+               if (is_badblock(conf->mirrors[mirror].rdev,
+                               r1_bio->sector, r1_bio->sectors,
+                               &first_bad, &bad_sectors)) {
+                       r1_bio->bios[mirror] = IO_MADE_GOOD;
+                       set_bit(R1BIO_MadeGood, &r1_bio->state);
+               }
+       }
+
        update_head_pos(mirror, r1_bio);
 
        if (behind) {
@@ -377,11 +422,13 @@ static void raid1_end_write_request(struct bio *bio, int error)
                                       (unsigned long long) mbio->bi_sector,
                                       (unsigned long long) mbio->bi_sector +
                                       (mbio->bi_size >> 9) - 1);
-                               bio_endio(mbio, 0);
+                               call_bio_endio(r1_bio);
                        }
                }
        }
-       rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
+       if (r1_bio->bios[mirror] == NULL)
+               rdev_dec_pending(conf->mirrors[mirror].rdev,
+                                conf->mddev);
 
        /*
         * Let's see if all mirrored write operations have finished
@@ -408,10 +455,11 @@ static void raid1_end_write_request(struct bio *bio, int error)
  *
  * The rdev for the device selected will have nr_pending incremented.
  */
-static int read_balance(conf_t *conf, r1bio_t *r1_bio)
+static int read_balance(conf_t *conf, r1bio_t *r1_bio, int *max_sectors)
 {
        const sector_t this_sector = r1_bio->sector;
-       const int sectors = r1_bio->sectors;
+       int sectors;
+       int best_good_sectors;
        int start_disk;
        int best_disk;
        int i;
@@ -426,8 +474,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
         * We take the first readable disk when above the resync window.
         */
  retry:
+       sectors = r1_bio->sectors;
        best_disk = -1;
        best_dist = MaxSector;
+       best_good_sectors = 0;
+
        if (conf->mddev->recovery_cp < MaxSector &&
            (this_sector + sectors >= conf->next_resync)) {
                choose_first = 1;
@@ -439,6 +490,9 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 
        for (i = 0 ; i < conf->raid_disks ; i++) {
                sector_t dist;
+               sector_t first_bad;
+               int bad_sectors;
+
                int disk = start_disk + i;
                if (disk >= conf->raid_disks)
                        disk -= conf->raid_disks;
@@ -461,6 +515,35 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
                /* This is a reasonable device to use.  It might
                 * even be best.
                 */
+               if (is_badblock(rdev, this_sector, sectors,
+                               &first_bad, &bad_sectors)) {
+                       if (best_dist < MaxSector)
+                               /* already have a better device */
+                               continue;
+                       if (first_bad <= this_sector) {
+                               /* cannot read here. If this is the 'primary'
+                                * device, then we must not read beyond
+                                * bad_sectors from another device..
+                                */
+                               bad_sectors -= (this_sector - first_bad);
+                               if (choose_first && sectors > bad_sectors)
+                                       sectors = bad_sectors;
+                               if (best_good_sectors > sectors)
+                                       best_good_sectors = sectors;
+
+                       } else {
+                               sector_t good_sectors = first_bad - this_sector;
+                               if (good_sectors > best_good_sectors) {
+                                       best_good_sectors = good_sectors;
+                                       best_disk = disk;
+                               }
+                               if (choose_first)
+                                       break;
+                       }
+                       continue;
+               } else
+                       best_good_sectors = sectors;
+
                dist = abs(this_sector - conf->mirrors[disk].head_position);
                if (choose_first
                    /* Don't change to another disk for sequential reads */
@@ -489,10 +572,12 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
                        rdev_dec_pending(rdev, conf->mddev);
                        goto retry;
                }
+               sectors = best_good_sectors;
                conf->next_seq_sect = this_sector + sectors;
                conf->last_used = best_disk;
        }
        rcu_read_unlock();
+       *max_sectors = sectors;
 
        return best_disk;
 }
@@ -672,30 +757,31 @@ static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
 {
        int i;
        struct bio_vec *bvec;
-       struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
+       struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
                                        GFP_NOIO);
-       if (unlikely(!pages))
+       if (unlikely(!bvecs))
                return;
 
        bio_for_each_segment(bvec, bio, i) {
-               pages[i] = alloc_page(GFP_NOIO);
-               if (unlikely(!pages[i]))
+               bvecs[i] = *bvec;
+               bvecs[i].bv_page = alloc_page(GFP_NOIO);
+               if (unlikely(!bvecs[i].bv_page))
                        goto do_sync_io;
-               memcpy(kmap(pages[i]) + bvec->bv_offset,
-                       kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
-               kunmap(pages[i]);
+               memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
+                      kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
+               kunmap(bvecs[i].bv_page);
                kunmap(bvec->bv_page);
        }
-       r1_bio->behind_pages = pages;
+       r1_bio->behind_bvecs = bvecs;
        r1_bio->behind_page_count = bio->bi_vcnt;
        set_bit(R1BIO_BehindIO, &r1_bio->state);
        return;
 
 do_sync_io:
        for (i = 0; i < bio->bi_vcnt; i++)
-               if (pages[i])
-                       put_page(pages[i]);
-       kfree(pages);
+               if (bvecs[i].bv_page)
+                       put_page(bvecs[i].bv_page);
+       kfree(bvecs);
        PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 }
 
@@ -705,7 +791,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        mirror_info_t *mirror;
        r1bio_t *r1_bio;
        struct bio *read_bio;
-       int i, targets = 0, disks;
+       int i, disks;
        struct bitmap *bitmap;
        unsigned long flags;
        const int rw = bio_data_dir(bio);
@@ -713,6 +799,9 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
        mdk_rdev_t *blocked_rdev;
        int plugged;
+       int first_clone;
+       int sectors_handled;
+       int max_sectors;
 
        /*
         * Register the new request and wait if the reconstruction
@@ -759,11 +848,24 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        r1_bio->mddev = mddev;
        r1_bio->sector = bio->bi_sector;
 
+       /* We might need to issue multiple reads to different
+        * devices if there are bad blocks around, so we keep
+        * track of the number of reads in bio->bi_phys_segments.
+        * If this is 0, there is only one r1_bio and no locking
+        * will be needed when requests complete.  If it is
+        * non-zero, then it is the number of not-completed requests.
+        */
+       bio->bi_phys_segments = 0;
+       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
        if (rw == READ) {
                /*
                 * read balancing logic:
                 */
-               int rdisk = read_balance(conf, r1_bio);
+               int rdisk;
+
+read_again:
+               rdisk = read_balance(conf, r1_bio, &max_sectors);
 
                if (rdisk < 0) {
                        /* couldn't find anywhere to read from */
@@ -784,6 +886,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                r1_bio->read_disk = rdisk;
 
                read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+               md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector,
+                           max_sectors);
 
                r1_bio->bios[rdisk] = read_bio;
 
@@ -793,16 +897,52 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                read_bio->bi_rw = READ | do_sync;
                read_bio->bi_private = r1_bio;
 
-               generic_make_request(read_bio);
+               if (max_sectors < r1_bio->sectors) {
+                       /* could not read all from this device, so we will
+                        * need another r1_bio.
+                        */
+
+                       sectors_handled = (r1_bio->sector + max_sectors
+                                          - bio->bi_sector);
+                       r1_bio->sectors = max_sectors;
+                       spin_lock_irq(&conf->device_lock);
+                       if (bio->bi_phys_segments == 0)
+                               bio->bi_phys_segments = 2;
+                       else
+                               bio->bi_phys_segments++;
+                       spin_unlock_irq(&conf->device_lock);
+                       /* Cannot call generic_make_request directly
+                        * as that will be queued in __make_request
+                        * and subsequent mempool_alloc might block waiting
+                        * for it.  So hand bio over to raid1d.
+                        */
+                       reschedule_retry(r1_bio);
+
+                       r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+                       r1_bio->master_bio = bio;
+                       r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+                       r1_bio->state = 0;
+                       r1_bio->mddev = mddev;
+                       r1_bio->sector = bio->bi_sector + sectors_handled;
+                       goto read_again;
+               } else
+                       generic_make_request(read_bio);
                return 0;
        }
 
        /*
         * WRITE:
         */
-       /* first select target devices under spinlock and
+       /* first select target devices under rcu_lock and
         * inc refcount on their rdev.  Record them by setting
         * bios[x] to bio
+        * If there are known/acknowledged bad blocks on any device on
+        * which we have seen a write error, we want to avoid writing those
+        * blocks.
+        * This potentially requires several writes to write around
+        * the bad blocks.  Each set of writes gets it's own r1bio
+        * with a set of bios attached.
         */
        plugged = mddev_check_plugged(mddev);
 
@@ -810,6 +950,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
  retry_write:
        blocked_rdev = NULL;
        rcu_read_lock();
+       max_sectors = r1_bio->sectors;
        for (i = 0;  i < disks; i++) {
                mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
                if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
@@ -817,17 +958,56 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                        blocked_rdev = rdev;
                        break;
                }
-               if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                       atomic_inc(&rdev->nr_pending);
-                       if (test_bit(Faulty, &rdev->flags)) {
+               r1_bio->bios[i] = NULL;
+               if (!rdev || test_bit(Faulty, &rdev->flags)) {
+                       set_bit(R1BIO_Degraded, &r1_bio->state);
+                       continue;
+               }
+
+               atomic_inc(&rdev->nr_pending);
+               if (test_bit(WriteErrorSeen, &rdev->flags)) {
+                       sector_t first_bad;
+                       int bad_sectors;
+                       int is_bad;
+
+                       is_bad = is_badblock(rdev, r1_bio->sector,
+                                            max_sectors,
+                                            &first_bad, &bad_sectors);
+                       if (is_bad < 0) {
+                               /* mustn't write here until the bad block is
+                                * acknowledged*/
+                               set_bit(BlockedBadBlocks, &rdev->flags);
+                               blocked_rdev = rdev;
+                               break;
+                       }
+                       if (is_bad && first_bad <= r1_bio->sector) {
+                               /* Cannot write here at all */
+                               bad_sectors -= (r1_bio->sector - first_bad);
+                               if (bad_sectors < max_sectors)
+                                       /* mustn't write more than bad_sectors
+                                        * to other devices yet
+                                        */
+                                       max_sectors = bad_sectors;
                                rdev_dec_pending(rdev, mddev);
-                               r1_bio->bios[i] = NULL;
-                       } else {
-                               r1_bio->bios[i] = bio;
-                               targets++;
+                               /* We don't set R1BIO_Degraded as that
+                                * only applies if the disk is
+                                * missing, so it might be re-added,
+                                * and we want to know to recover this
+                                * chunk.
+                                * In this case the device is here,
+                                * and the fact that this chunk is not
+                                * in-sync is recorded in the bad
+                                * block log
+                                */
+                               continue;
                        }
-               } else
-                       r1_bio->bios[i] = NULL;
+                       if (is_bad) {
+                               int good_sectors = first_bad - r1_bio->sector;
+                               if (good_sectors < max_sectors)
+                                       max_sectors = good_sectors;
+                       }
+               }
+               r1_bio->bios[i] = bio;
        }
        rcu_read_unlock();
 
@@ -838,51 +1018,57 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-
+               r1_bio->state = 0;
                allow_barrier(conf);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
                wait_barrier(conf);
                goto retry_write;
        }
 
-       BUG_ON(targets == 0); /* we never fail the last device */
-
-       if (targets < conf->raid_disks) {
-               /* array is degraded, we will not clear the bitmap
-                * on I/O completion (see raid1_end_write_request) */
-               set_bit(R1BIO_Degraded, &r1_bio->state);
+       if (max_sectors < r1_bio->sectors) {
+               /* We are splitting this write into multiple parts, so
+                * we need to prepare for allocating another r1_bio.
+                */
+               r1_bio->sectors = max_sectors;
+               spin_lock_irq(&conf->device_lock);
+               if (bio->bi_phys_segments == 0)
+                       bio->bi_phys_segments = 2;
+               else
+                       bio->bi_phys_segments++;
+               spin_unlock_irq(&conf->device_lock);
        }
-
-       /* do behind I/O ?
-        * Not if there are too many, or cannot allocate memory,
-        * or a reader on WriteMostly is waiting for behind writes 
-        * to flush */
-       if (bitmap &&
-           (atomic_read(&bitmap->behind_writes)
-            < mddev->bitmap_info.max_write_behind) &&
-           !waitqueue_active(&bitmap->behind_wait))
-               alloc_behind_pages(bio, r1_bio);
+       sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector;
 
        atomic_set(&r1_bio->remaining, 1);
        atomic_set(&r1_bio->behind_remaining, 0);
 
-       bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors,
-                               test_bit(R1BIO_BehindIO, &r1_bio->state));
+       first_clone = 1;
        for (i = 0; i < disks; i++) {
                struct bio *mbio;
                if (!r1_bio->bios[i])
                        continue;
 
                mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-               r1_bio->bios[i] = mbio;
-
-               mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
-               mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-               mbio->bi_end_io = raid1_end_write_request;
-               mbio->bi_rw = WRITE | do_flush_fua | do_sync;
-               mbio->bi_private = r1_bio;
-
-               if (r1_bio->behind_pages) {
+               md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
+
+               if (first_clone) {
+                       /* do behind I/O ?
+                        * Not if there are too many, or cannot
+                        * allocate memory, or a reader on WriteMostly
+                        * is waiting for behind writes to flush */
+                       if (bitmap &&
+                           (atomic_read(&bitmap->behind_writes)
+                            < mddev->bitmap_info.max_write_behind) &&
+                           !waitqueue_active(&bitmap->behind_wait))
+                               alloc_behind_pages(mbio, r1_bio);
+
+                       bitmap_startwrite(bitmap, r1_bio->sector,
+                                         r1_bio->sectors,
+                                         test_bit(R1BIO_BehindIO,
+                                                  &r1_bio->state));
+                       first_clone = 0;
+               }
+               if (r1_bio->behind_bvecs) {
                        struct bio_vec *bvec;
                        int j;
 
@@ -894,11 +1080,20 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                         * them all
                         */
                        __bio_for_each_segment(bvec, mbio, j, 0)
-                               bvec->bv_page = r1_bio->behind_pages[j];
+                               bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
                        if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
                                atomic_inc(&r1_bio->behind_remaining);
                }
 
+               r1_bio->bios[i] = mbio;
+
+               mbio->bi_sector = (r1_bio->sector +
+                                  conf->mirrors[i].rdev->data_offset);
+               mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+               mbio->bi_end_io = raid1_end_write_request;
+               mbio->bi_rw = WRITE | do_flush_fua | do_sync;
+               mbio->bi_private = r1_bio;
+
                atomic_inc(&r1_bio->remaining);
                spin_lock_irqsave(&conf->device_lock, flags);
                bio_list_add(&conf->pending_bio_list, mbio);
@@ -909,6 +1104,19 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid1d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
 
+       if (sectors_handled < (bio->bi_size >> 9)) {
+               /* We need another r1_bio.  It has already been counted
+                * in bio->bi_phys_segments
+                */
+               r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+               r1_bio->master_bio = bio;
+               r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+               r1_bio->state = 0;
+               r1_bio->mddev = mddev;
+               r1_bio->sector = bio->bi_sector + sectors_handled;
+               goto retry_write;
+       }
+
        if (do_sync || !bitmap || !plugged)
                md_wakeup_thread(mddev->thread);
 
@@ -952,9 +1160,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 * However don't try a recovery from this drive as
                 * it is very likely to fail.
                 */
-               mddev->recovery_disabled = 1;
+               conf->recovery_disabled = mddev->recovery_disabled;
                return;
        }
+       set_bit(Blocked, &rdev->flags);
        if (test_and_clear_bit(In_sync, &rdev->flags)) {
                unsigned long flags;
                spin_lock_irqsave(&conf->device_lock, flags);
@@ -1027,7 +1236,7 @@ static int raid1_spare_active(mddev_t *mddev)
                    && !test_bit(Faulty, &rdev->flags)
                    && !test_and_set_bit(In_sync, &rdev->flags)) {
                        count++;
-                       sysfs_notify_dirent(rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(rdev->sysfs_state);
                }
        }
        spin_lock_irqsave(&conf->device_lock, flags);
@@ -1048,6 +1257,9 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
        int first = 0;
        int last = mddev->raid_disks - 1;
 
+       if (mddev->recovery_disabled == conf->recovery_disabled)
+               return -EBUSY;
+
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
 
@@ -1103,7 +1315,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
                 * is not possible.
                 */
                if (!test_bit(Faulty, &rdev->flags) &&
-                   !mddev->recovery_disabled &&
+                   mddev->recovery_disabled != conf->recovery_disabled &&
                    mddev->degraded < conf->raid_disks) {
                        err = -EBUSY;
                        goto abort;
@@ -1155,6 +1367,8 @@ static void end_sync_write(struct bio *bio, int error)
        conf_t *conf = mddev->private;
        int i;
        int mirror=0;
+       sector_t first_bad;
+       int bad_sectors;
 
        for (i = 0; i < conf->raid_disks; i++)
                if (r1_bio->bios[i] == bio) {
@@ -1172,18 +1386,48 @@ static void end_sync_write(struct bio *bio, int error)
                        s += sync_blocks;
                        sectors_to_go -= sync_blocks;
                } while (sectors_to_go > 0);
-               md_error(mddev, conf->mirrors[mirror].rdev);
-       }
+               set_bit(WriteErrorSeen,
+                       &conf->mirrors[mirror].rdev->flags);
+               set_bit(R1BIO_WriteError, &r1_bio->state);
+       } else if (is_badblock(conf->mirrors[mirror].rdev,
+                              r1_bio->sector,
+                              r1_bio->sectors,
+                              &first_bad, &bad_sectors) &&
+                  !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
+                               r1_bio->sector,
+                               r1_bio->sectors,
+                               &first_bad, &bad_sectors)
+               )
+               set_bit(R1BIO_MadeGood, &r1_bio->state);
 
        update_head_pos(mirror, r1_bio);
 
        if (atomic_dec_and_test(&r1_bio->remaining)) {
-               sector_t s = r1_bio->sectors;
-               put_buf(r1_bio);
-               md_done_sync(mddev, s, uptodate);
+               int s = r1_bio->sectors;
+               if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+                   test_bit(R1BIO_WriteError, &r1_bio->state))
+                       reschedule_retry(r1_bio);
+               else {
+                       put_buf(r1_bio);
+                       md_done_sync(mddev, s, uptodate);
+               }
        }
 }
 
+static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+                           int sectors, struct page *page, int rw)
+{
+       if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+               /* success */
+               return 1;
+       if (rw == WRITE)
+               set_bit(WriteErrorSeen, &rdev->flags);
+       /* need to record an error - either for the block or the device */
+       if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+               md_error(rdev->mddev, rdev);
+       return 0;
+}
+
 static int fix_sync_read_error(r1bio_t *r1_bio)
 {
        /* Try some synchronous reads of other devices to get
@@ -1193,6 +1437,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
         * We don't need to freeze the array, because being in an
         * active sync request, there is no normal IO, and
         * no overlapping syncs.
+        * We don't need to check is_badblock() again as we
+        * made sure that anything with a bad block in range
+        * will have bi_end_io clear.
         */
        mddev_t *mddev = r1_bio->mddev;
        conf_t *conf = mddev->private;
@@ -1217,9 +1464,7 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
                                 * active, and resync is currently active
                                 */
                                rdev = conf->mirrors[d].rdev;
-                               if (sync_page_io(rdev,
-                                                sect,
-                                                s<<9,
+                               if (sync_page_io(rdev, sect, s<<9,
                                                 bio->bi_io_vec[idx].bv_page,
                                                 READ, false)) {
                                        success = 1;
@@ -1233,16 +1478,36 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
 
                if (!success) {
                        char b[BDEVNAME_SIZE];
-                       /* Cannot read from anywhere, array is toast */
-                       md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+                       int abort = 0;
+                       /* Cannot read from anywhere, this block is lost.
+                        * Record a bad block on each device.  If that doesn't
+                        * work just disable and interrupt the recovery.
+                        * Don't fail devices as that won't really help.
+                        */
                        printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
                               " for block %llu\n",
                               mdname(mddev),
                               bdevname(bio->bi_bdev, b),
                               (unsigned long long)r1_bio->sector);
-                       md_done_sync(mddev, r1_bio->sectors, 0);
-                       put_buf(r1_bio);
-                       return 0;
+                       for (d = 0; d < conf->raid_disks; d++) {
+                               rdev = conf->mirrors[d].rdev;
+                               if (!rdev || test_bit(Faulty, &rdev->flags))
+                                       continue;
+                               if (!rdev_set_badblocks(rdev, sect, s, 0))
+                                       abort = 1;
+                       }
+                       if (abort) {
+                               mddev->recovery_disabled = 1;
+                               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+                               md_done_sync(mddev, r1_bio->sectors, 0);
+                               put_buf(r1_bio);
+                               return 0;
+                       }
+                       /* Try next page */
+                       sectors -= s;
+                       sect += s;
+                       idx++;
+                       continue;
                }
 
                start = d;
@@ -1254,16 +1519,12 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
                        if (r1_bio->bios[d]->bi_end_io != end_sync_read)
                                continue;
                        rdev = conf->mirrors[d].rdev;
-                       if (sync_page_io(rdev,
-                                        sect,
-                                        s<<9,
-                                        bio->bi_io_vec[idx].bv_page,
-                                        WRITE, false) == 0) {
+                       if (r1_sync_page_io(rdev, sect, s,
+                                           bio->bi_io_vec[idx].bv_page,
+                                           WRITE) == 0) {
                                r1_bio->bios[d]->bi_end_io = NULL;
                                rdev_dec_pending(rdev, mddev);
-                               md_error(mddev, rdev);
-                       } else
-                               atomic_add(s, &rdev->corrected_errors);
+                       }
                }
                d = start;
                while (d != r1_bio->read_disk) {
@@ -1273,12 +1534,10 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
                        if (r1_bio->bios[d]->bi_end_io != end_sync_read)
                                continue;
                        rdev = conf->mirrors[d].rdev;
-                       if (sync_page_io(rdev,
-                                        sect,
-                                        s<<9,
-                                        bio->bi_io_vec[idx].bv_page,
-                                        READ, false) == 0)
-                               md_error(mddev, rdev);
+                       if (r1_sync_page_io(rdev, sect, s,
+                                           bio->bi_io_vec[idx].bv_page,
+                                           READ) != 0)
+                               atomic_add(s, &rdev->corrected_errors);
                }
                sectors -= s;
                sect += s;
@@ -1420,7 +1679,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
  *
  *     1.      Retries failed read operations on working mirrors.
  *     2.      Updates the raid superblock when problems encounter.
- *     3.      Performs writes following reads for array syncronising.
+ *     3.      Performs writes following reads for array synchronising.
  */
 
 static void fix_read_error(conf_t *conf, int read_disk,
@@ -1443,9 +1702,14 @@ static void fix_read_error(conf_t *conf, int read_disk,
                         * which is the thread that might remove
                         * a device.  If raid1d ever becomes multi-threaded....
                         */
+                       sector_t first_bad;
+                       int bad_sectors;
+
                        rdev = conf->mirrors[d].rdev;
                        if (rdev &&
                            test_bit(In_sync, &rdev->flags) &&
+                           is_badblock(rdev, sect, s,
+                                       &first_bad, &bad_sectors) == 0 &&
                            sync_page_io(rdev, sect, s<<9,
                                         conf->tmppage, READ, false))
                                success = 1;
@@ -1457,8 +1721,10 @@ static void fix_read_error(conf_t *conf, int read_disk,
                } while (!success && d != read_disk);
 
                if (!success) {
-                       /* Cannot read from anywhere -- bye bye array */
-                       md_error(mddev, conf->mirrors[read_disk].rdev);
+                       /* Cannot read from anywhere - mark it bad */
+                       mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
+                       if (!rdev_set_badblocks(rdev, sect, s, 0))
+                               md_error(mddev, rdev);
                        break;
                }
                /* write it back and re-read */
@@ -1469,13 +1735,9 @@ static void fix_read_error(conf_t *conf, int read_disk,
                        d--;
                        rdev = conf->mirrors[d].rdev;
                        if (rdev &&
-                           test_bit(In_sync, &rdev->flags)) {
-                               if (sync_page_io(rdev, sect, s<<9,
-                                                conf->tmppage, WRITE, false)
-                                   == 0)
-                                       /* Well, this device is dead */
-                                       md_error(mddev, rdev);
-                       }
+                           test_bit(In_sync, &rdev->flags))
+                               r1_sync_page_io(rdev, sect, s,
+                                               conf->tmppage, WRITE);
                }
                d = start;
                while (d != read_disk) {
@@ -1486,12 +1748,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
                        rdev = conf->mirrors[d].rdev;
                        if (rdev &&
                            test_bit(In_sync, &rdev->flags)) {
-                               if (sync_page_io(rdev, sect, s<<9,
-                                                conf->tmppage, READ, false)
-                                   == 0)
-                                       /* Well, this device is dead */
-                                       md_error(mddev, rdev);
-                               else {
+                               if (r1_sync_page_io(rdev, sect, s,
+                                                   conf->tmppage, READ)) {
                                        atomic_add(s, &rdev->corrected_errors);
                                        printk(KERN_INFO
                                               "md/raid1:%s: read error corrected "
@@ -1508,21 +1766,255 @@ static void fix_read_error(conf_t *conf, int read_disk,
        }
 }
 
+static void bi_complete(struct bio *bio, int error)
+{
+       complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+       struct completion event;
+       rw |= REQ_SYNC;
+
+       init_completion(&event);
+       bio->bi_private = &event;
+       bio->bi_end_io = bi_complete;
+       submit_bio(rw, bio);
+       wait_for_completion(&event);
+
+       return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r1bio_t *r1_bio, int i)
+{
+       mddev_t *mddev = r1_bio->mddev;
+       conf_t *conf = mddev->private;
+       mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+       int vcnt, idx;
+       struct bio_vec *vec;
+
+       /* bio has the data to be written to device 'i' where
+        * we just recently had a write error.
+        * We repeatedly clone the bio and trim down to one block,
+        * then try the write.  Where the write fails we record
+        * a bad block.
+        * It is conceivable that the bio doesn't exactly align with
+        * blocks.  We must handle this somehow.
+        *
+        * We currently own a reference on the rdev.
+        */
+
+       int block_sectors;
+       sector_t sector;
+       int sectors;
+       int sect_to_write = r1_bio->sectors;
+       int ok = 1;
+
+       if (rdev->badblocks.shift < 0)
+               return 0;
+
+       block_sectors = 1 << rdev->badblocks.shift;
+       sector = r1_bio->sector;
+       sectors = ((sector + block_sectors)
+                  & ~(sector_t)(block_sectors - 1))
+               - sector;
+
+       if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+               vcnt = r1_bio->behind_page_count;
+               vec = r1_bio->behind_bvecs;
+               idx = 0;
+               while (vec[idx].bv_page == NULL)
+                       idx++;
+       } else {
+               vcnt = r1_bio->master_bio->bi_vcnt;
+               vec = r1_bio->master_bio->bi_io_vec;
+               idx = r1_bio->master_bio->bi_idx;
+       }
+       while (sect_to_write) {
+               struct bio *wbio;
+               if (sectors > sect_to_write)
+                       sectors = sect_to_write;
+               /* Write at 'sector' for 'sectors'*/
+
+               wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+               memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+               wbio->bi_sector = r1_bio->sector;
+               wbio->bi_rw = WRITE;
+               wbio->bi_vcnt = vcnt;
+               wbio->bi_size = r1_bio->sectors << 9;
+               wbio->bi_idx = idx;
+
+               md_trim_bio(wbio, sector - r1_bio->sector, sectors);
+               wbio->bi_sector += rdev->data_offset;
+               wbio->bi_bdev = rdev->bdev;
+               if (submit_bio_wait(WRITE, wbio) == 0)
+                       /* failure! */
+                       ok = rdev_set_badblocks(rdev, sector,
+                                               sectors, 0)
+                               && ok;
+
+               bio_put(wbio);
+               sect_to_write -= sectors;
+               sector += sectors;
+               sectors = block_sectors;
+       }
+       return ok;
+}
+
+static void handle_sync_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+       int m;
+       int s = r1_bio->sectors;
+       for (m = 0; m < conf->raid_disks ; m++) {
+               mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+               struct bio *bio = r1_bio->bios[m];
+               if (bio->bi_end_io == NULL)
+                       continue;
+               if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+                   test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+                       rdev_clear_badblocks(rdev, r1_bio->sector, s);
+               }
+               if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+                   test_bit(R1BIO_WriteError, &r1_bio->state)) {
+                       if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
+                               md_error(conf->mddev, rdev);
+               }
+       }
+       put_buf(r1_bio);
+       md_done_sync(conf->mddev, s, 1);
+}
+
+static void handle_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+       int m;
+       for (m = 0; m < conf->raid_disks ; m++)
+               if (r1_bio->bios[m] == IO_MADE_GOOD) {
+                       mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+                       rdev_clear_badblocks(rdev,
+                                            r1_bio->sector,
+                                            r1_bio->sectors);
+                       rdev_dec_pending(rdev, conf->mddev);
+               } else if (r1_bio->bios[m] != NULL) {
+                       /* This drive got a write error.  We need to
+                        * narrow down and record precise write
+                        * errors.
+                        */
+                       if (!narrow_write_error(r1_bio, m)) {
+                               md_error(conf->mddev,
+                                        conf->mirrors[m].rdev);
+                               /* an I/O failed, we can't clear the bitmap */
+                               set_bit(R1BIO_Degraded, &r1_bio->state);
+                       }
+                       rdev_dec_pending(conf->mirrors[m].rdev,
+                                        conf->mddev);
+               }
+       if (test_bit(R1BIO_WriteError, &r1_bio->state))
+               close_write(r1_bio);
+       raid_end_bio_io(r1_bio);
+}
+
+static void handle_read_error(conf_t *conf, r1bio_t *r1_bio)
+{
+       int disk;
+       int max_sectors;
+       mddev_t *mddev = conf->mddev;
+       struct bio *bio;
+       char b[BDEVNAME_SIZE];
+       mdk_rdev_t *rdev;
+
+       clear_bit(R1BIO_ReadError, &r1_bio->state);
+       /* we got a read error. Maybe the drive is bad.  Maybe just
+        * the block and we can fix it.
+        * We freeze all other IO, and try reading the block from
+        * other devices.  When we find one, we re-write
+        * and check it that fixes the read error.
+        * This is all done synchronously while the array is
+        * frozen
+        */
+       if (mddev->ro == 0) {
+               freeze_array(conf);
+               fix_read_error(conf, r1_bio->read_disk,
+                              r1_bio->sector, r1_bio->sectors);
+               unfreeze_array(conf);
+       } else
+               md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+
+       bio = r1_bio->bios[r1_bio->read_disk];
+       bdevname(bio->bi_bdev, b);
+read_more:
+       disk = read_balance(conf, r1_bio, &max_sectors);
+       if (disk == -1) {
+               printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
+                      " read error for block %llu\n",
+                      mdname(mddev), b, (unsigned long long)r1_bio->sector);
+               raid_end_bio_io(r1_bio);
+       } else {
+               const unsigned long do_sync
+                       = r1_bio->master_bio->bi_rw & REQ_SYNC;
+               if (bio) {
+                       r1_bio->bios[r1_bio->read_disk] =
+                               mddev->ro ? IO_BLOCKED : NULL;
+                       bio_put(bio);
+               }
+               r1_bio->read_disk = disk;
+               bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+               md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
+               r1_bio->bios[r1_bio->read_disk] = bio;
+               rdev = conf->mirrors[disk].rdev;
+               printk_ratelimited(KERN_ERR
+                                  "md/raid1:%s: redirecting sector %llu"
+                                  " to other mirror: %s\n",
+                                  mdname(mddev),
+                                  (unsigned long long)r1_bio->sector,
+                                  bdevname(rdev->bdev, b));
+               bio->bi_sector = r1_bio->sector + rdev->data_offset;
+               bio->bi_bdev = rdev->bdev;
+               bio->bi_end_io = raid1_end_read_request;
+               bio->bi_rw = READ | do_sync;
+               bio->bi_private = r1_bio;
+               if (max_sectors < r1_bio->sectors) {
+                       /* Drat - have to split this up more */
+                       struct bio *mbio = r1_bio->master_bio;
+                       int sectors_handled = (r1_bio->sector + max_sectors
+                                              - mbio->bi_sector);
+                       r1_bio->sectors = max_sectors;
+                       spin_lock_irq(&conf->device_lock);
+                       if (mbio->bi_phys_segments == 0)
+                               mbio->bi_phys_segments = 2;
+                       else
+                               mbio->bi_phys_segments++;
+                       spin_unlock_irq(&conf->device_lock);
+                       generic_make_request(bio);
+                       bio = NULL;
+
+                       r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+                       r1_bio->master_bio = mbio;
+                       r1_bio->sectors = (mbio->bi_size >> 9)
+                                         - sectors_handled;
+                       r1_bio->state = 0;
+                       set_bit(R1BIO_ReadError, &r1_bio->state);
+                       r1_bio->mddev = mddev;
+                       r1_bio->sector = mbio->bi_sector + sectors_handled;
+
+                       goto read_more;
+               } else
+                       generic_make_request(bio);
+       }
+}
+
 static void raid1d(mddev_t *mddev)
 {
        r1bio_t *r1_bio;
-       struct bio *bio;
        unsigned long flags;
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
-       mdk_rdev_t *rdev;
        struct blk_plug plug;
 
        md_check_recovery(mddev);
 
        blk_start_plug(&plug);
        for (;;) {
-               char b[BDEVNAME_SIZE];
 
                if (atomic_read(&mddev->plug_cnt) == 0)
                        flush_pending_writes(conf);
@@ -1539,62 +2031,26 @@ static void raid1d(mddev_t *mddev)
 
                mddev = r1_bio->mddev;
                conf = mddev->private;
-               if (test_bit(R1BIO_IsSync, &r1_bio->state))
-                       sync_request_write(mddev, r1_bio);
-               else {
-                       int disk;
-
-                       /* we got a read error. Maybe the drive is bad.  Maybe just
-                        * the block and we can fix it.
-                        * We freeze all other IO, and try reading the block from
-                        * other devices.  When we find one, we re-write
-                        * and check it that fixes the read error.
-                        * This is all done synchronously while the array is
-                        * frozen
+               if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
+                       if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+                           test_bit(R1BIO_WriteError, &r1_bio->state))
+                               handle_sync_write_finished(conf, r1_bio);
+                       else
+                               sync_request_write(mddev, r1_bio);
+               } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+                          test_bit(R1BIO_WriteError, &r1_bio->state))
+                       handle_write_finished(conf, r1_bio);
+               else if (test_bit(R1BIO_ReadError, &r1_bio->state))
+                       handle_read_error(conf, r1_bio);
+               else
+                       /* just a partial read to be scheduled from separate
+                        * context
                         */
-                       if (mddev->ro == 0) {
-                               freeze_array(conf);
-                               fix_read_error(conf, r1_bio->read_disk,
-                                              r1_bio->sector,
-                                              r1_bio->sectors);
-                               unfreeze_array(conf);
-                       } else
-                               md_error(mddev,
-                                        conf->mirrors[r1_bio->read_disk].rdev);
-
-                       bio = r1_bio->bios[r1_bio->read_disk];
-                       if ((disk=read_balance(conf, r1_bio)) == -1) {
-                               printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
-                                      " read error for block %llu\n",
-                                      mdname(mddev),
-                                      bdevname(bio->bi_bdev,b),
-                                      (unsigned long long)r1_bio->sector);
-                               raid_end_bio_io(r1_bio);
-                       } else {
-                               const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
-                               r1_bio->bios[r1_bio->read_disk] =
-                                       mddev->ro ? IO_BLOCKED : NULL;
-                               r1_bio->read_disk = disk;
-                               bio_put(bio);
-                               bio = bio_clone_mddev(r1_bio->master_bio,
-                                                     GFP_NOIO, mddev);
-                               r1_bio->bios[r1_bio->read_disk] = bio;
-                               rdev = conf->mirrors[disk].rdev;
-                               if (printk_ratelimit())
-                                       printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to"
-                                              " other mirror: %s\n",
-                                              mdname(mddev),
-                                              (unsigned long long)r1_bio->sector,
-                                              bdevname(rdev->bdev,b));
-                               bio->bi_sector = r1_bio->sector + rdev->data_offset;
-                               bio->bi_bdev = rdev->bdev;
-                               bio->bi_end_io = raid1_end_read_request;
-                               bio->bi_rw = READ | do_sync;
-                               bio->bi_private = r1_bio;
-                               generic_make_request(bio);
-                       }
-               }
+                       generic_make_request(r1_bio->bios[r1_bio->read_disk]);
+
                cond_resched();
+               if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+                       md_check_recovery(mddev);
        }
        blk_finish_plug(&plug);
 }
@@ -1636,6 +2092,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
        int write_targets = 0, read_targets = 0;
        sector_t sync_blocks;
        int still_degraded = 0;
+       int good_sectors = RESYNC_SECTORS;
+       int min_bad = 0; /* number of sectors that are bad in all devices */
 
        if (!conf->r1buf_pool)
                if (init_resync(conf))
@@ -1723,36 +2181,89 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
                rdev = rcu_dereference(conf->mirrors[i].rdev);
                if (rdev == NULL ||
-                          test_bit(Faulty, &rdev->flags)) {
+                   test_bit(Faulty, &rdev->flags)) {
                        still_degraded = 1;
-                       continue;
                } else if (!test_bit(In_sync, &rdev->flags)) {
                        bio->bi_rw = WRITE;
                        bio->bi_end_io = end_sync_write;
                        write_targets ++;
                } else {
                        /* may need to read from here */
-                       bio->bi_rw = READ;
-                       bio->bi_end_io = end_sync_read;
-                       if (test_bit(WriteMostly, &rdev->flags)) {
-                               if (wonly < 0)
-                                       wonly = i;
-                       } else {
-                               if (disk < 0)
-                                       disk = i;
+                       sector_t first_bad = MaxSector;
+                       int bad_sectors;
+
+                       if (is_badblock(rdev, sector_nr, good_sectors,
+                                       &first_bad, &bad_sectors)) {
+                               if (first_bad > sector_nr)
+                                       good_sectors = first_bad - sector_nr;
+                               else {
+                                       bad_sectors -= (sector_nr - first_bad);
+                                       if (min_bad == 0 ||
+                                           min_bad > bad_sectors)
+                                               min_bad = bad_sectors;
+                               }
+                       }
+                       if (sector_nr < first_bad) {
+                               if (test_bit(WriteMostly, &rdev->flags)) {
+                                       if (wonly < 0)
+                                               wonly = i;
+                               } else {
+                                       if (disk < 0)
+                                               disk = i;
+                               }
+                               bio->bi_rw = READ;
+                               bio->bi_end_io = end_sync_read;
+                               read_targets++;
                        }
-                       read_targets++;
                }
-               atomic_inc(&rdev->nr_pending);
-               bio->bi_sector = sector_nr + rdev->data_offset;
-               bio->bi_bdev = rdev->bdev;
-               bio->bi_private = r1_bio;
+               if (bio->bi_end_io) {
+                       atomic_inc(&rdev->nr_pending);
+                       bio->bi_sector = sector_nr + rdev->data_offset;
+                       bio->bi_bdev = rdev->bdev;
+                       bio->bi_private = r1_bio;
+               }
        }
        rcu_read_unlock();
        if (disk < 0)
                disk = wonly;
        r1_bio->read_disk = disk;
 
+       if (read_targets == 0 && min_bad > 0) {
+               /* These sectors are bad on all InSync devices, so we
+                * need to mark them bad on all write targets
+                */
+               int ok = 1;
+               for (i = 0 ; i < conf->raid_disks ; i++)
+                       if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
+                               mdk_rdev_t *rdev =
+                                       rcu_dereference(conf->mirrors[i].rdev);
+                               ok = rdev_set_badblocks(rdev, sector_nr,
+                                                       min_bad, 0
+                                       ) && ok;
+                       }
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               *skipped = 1;
+               put_buf(r1_bio);
+
+               if (!ok) {
+                       /* Cannot record the badblocks, so need to
+                        * abort the resync.
+                        * If there are multiple read targets, could just
+                        * fail the really bad ones ???
+                        */
+                       conf->recovery_disabled = mddev->recovery_disabled;
+                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+                       return 0;
+               } else
+                       return min_bad;
+
+       }
+       if (min_bad > 0 && min_bad < good_sectors) {
+               /* only resync enough to reach the next bad->good
+                * transition */
+               good_sectors = min_bad;
+       }
+
        if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
                /* extra read targets are also write targets */
                write_targets += read_targets-1;
@@ -1769,6 +2280,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
        if (max_sector > mddev->resync_max)
                max_sector = mddev->resync_max; /* Don't do IO beyond here */
+       if (max_sector > sector_nr + good_sectors)
+               max_sector = sector_nr + good_sectors;
        nr_sectors = 0;
        sync_blocks = 0;
        do {
@@ -2154,18 +2667,13 @@ static int raid1_reshape(mddev_t *mddev)
        for (d = d2 = 0; d < conf->raid_disks; d++) {
                mdk_rdev_t *rdev = conf->mirrors[d].rdev;
                if (rdev && rdev->raid_disk != d2) {
-                       char nm[20];
-                       sprintf(nm, "rd%d", rdev->raid_disk);
-                       sysfs_remove_link(&mddev->kobj, nm);
+                       sysfs_unlink_rdev(mddev, rdev);
                        rdev->raid_disk = d2;
-                       sprintf(nm, "rd%d", rdev->raid_disk);
-                       sysfs_remove_link(&mddev->kobj, nm);
-                       if (sysfs_create_link(&mddev->kobj,
-                                             &rdev->kobj, nm))
+                       sysfs_unlink_rdev(mddev, rdev);
+                       if (sysfs_link_rdev(mddev, rdev))
                                printk(KERN_WARNING
-                                      "md/raid1:%s: cannot register "
-                                      "%s\n",
-                                      mdname(mddev), nm);
+                                      "md/raid1:%s: cannot register rd%d\n",
+                                      mdname(mddev), rdev->raid_disk);
                }
                if (rdev)
                        newmirrors[d2++].rdev = rdev;
index e743a64..e0d676b 100644 (file)
@@ -48,6 +48,12 @@ struct r1_private_data_s {
                                            * (fresh device added).
                                            * Cleared when a sync completes.
                                            */
+       int                     recovery_disabled; /* when the same as
+                                                   * mddev->recovery_disabled
+                                                   * we don't allow recovery
+                                                   * to be attempted as we
+                                                   * expect a read error
+                                                   */
 
        wait_queue_head_t       wait_barrier;
 
@@ -95,7 +101,7 @@ struct r1bio_s {
 
        struct list_head        retry_list;
        /* Next two are only valid when R1BIO_BehindIO is set */
-       struct page             **behind_pages;
+       struct bio_vec          *behind_bvecs;
        int                     behind_page_count;
        /*
         * if the IO is in WRITE direction, then multiple bios are used.
@@ -110,13 +116,24 @@ struct r1bio_s {
  * correct the read error.  To keep track of bad blocks on a per-bio
  * level, we store IO_BLOCKED in the appropriate 'bios' pointer
  */
-#define IO_BLOCKED ((struct bio*)1)
+#define IO_BLOCKED ((struct bio *)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context.  So we record
+ * the success by setting bios[n] to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
 
 /* bits for r1bio.state */
 #define        R1BIO_Uptodate  0
 #define        R1BIO_IsSync    1
 #define        R1BIO_Degraded  2
 #define        R1BIO_BehindIO  3
+/* Set ReadError on bios that experience a readerror so that
+ * raid1d knows what to do with them.
+ */
+#define R1BIO_ReadError 4
 /* For write-behind requests, we call bi_end_io when
  * the last non-write-behind device completes, providing
  * any write was successful.  Otherwise we call when
@@ -125,6 +142,11 @@ struct r1bio_s {
  * Record that bi_end_io was called with this flag...
  */
 #define        R1BIO_Returned 6
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag
+ */
+#define        R1BIO_MadeGood 7
+#define        R1BIO_WriteError 8
 
 extern int md_raid1_congested(mddev_t *mddev, int bits);
 
index 6e84668..8b29cd4 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
+#include <linux/ratelimit.h>
 #include "md.h"
 #include "raid10.h"
 #include "raid0.h"
@@ -123,7 +124,14 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
        for (j = 0 ; j < nalloc; j++) {
                bio = r10_bio->devs[j].bio;
                for (i = 0; i < RESYNC_PAGES; i++) {
-                       page = alloc_page(gfp_flags);
+                       if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
+                                               &conf->mddev->recovery)) {
+                               /* we can share bv_page's during recovery */
+                               struct bio *rbio = r10_bio->devs[0].bio;
+                               page = rbio->bi_io_vec[i].bv_page;
+                               get_page(page);
+                       } else
+                               page = alloc_page(gfp_flags);
                        if (unlikely(!page))
                                goto out_free_pages;
 
@@ -173,7 +181,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
 
        for (i = 0; i < conf->copies; i++) {
                struct bio **bio = & r10_bio->devs[i].bio;
-               if (*bio && *bio != IO_BLOCKED)
+               if (!BIO_SPECIAL(*bio))
                        bio_put(*bio);
                *bio = NULL;
        }
@@ -183,12 +191,6 @@ static void free_r10bio(r10bio_t *r10_bio)
 {
        conf_t *conf = r10_bio->mddev->private;
 
-       /*
-        * Wake up any possible resync thread that waits for the device
-        * to go idle.
-        */
-       allow_barrier(conf);
-
        put_all_bios(conf, r10_bio);
        mempool_free(r10_bio, conf->r10bio_pool);
 }
@@ -227,9 +229,27 @@ static void reschedule_retry(r10bio_t *r10_bio)
 static void raid_end_bio_io(r10bio_t *r10_bio)
 {
        struct bio *bio = r10_bio->master_bio;
+       int done;
+       conf_t *conf = r10_bio->mddev->private;
 
-       bio_endio(bio,
-               test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO);
+       if (bio->bi_phys_segments) {
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
+               bio->bi_phys_segments--;
+               done = (bio->bi_phys_segments == 0);
+               spin_unlock_irqrestore(&conf->device_lock, flags);
+       } else
+               done = 1;
+       if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       if (done) {
+               bio_endio(bio, 0);
+               /*
+                * Wake up any possible resync thread that waits for the device
+                * to go idle.
+                */
+               allow_barrier(conf);
+       }
        free_r10bio(r10_bio);
 }
 
@@ -244,6 +264,26 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio)
                r10_bio->devs[slot].addr + (r10_bio->sectors);
 }
 
+/*
+ * Find the disk number which triggered given bio
+ */
+static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio,
+                        struct bio *bio, int *slotp)
+{
+       int slot;
+
+       for (slot = 0; slot < conf->copies; slot++)
+               if (r10_bio->devs[slot].bio == bio)
+                       break;
+
+       BUG_ON(slot == conf->copies);
+       update_head_pos(slot, r10_bio);
+
+       if (slotp)
+               *slotp = slot;
+       return r10_bio->devs[slot].devnum;
+}
+
 static void raid10_end_read_request(struct bio *bio, int error)
 {
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -277,34 +317,45 @@ static void raid10_end_read_request(struct bio *bio, int error)
                 * oops, read error - keep the refcount on the rdev
                 */
                char b[BDEVNAME_SIZE];
-               if (printk_ratelimit())
-                       printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n",
-                              mdname(conf->mddev),
-                              bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
+               printk_ratelimited(KERN_ERR
+                                  "md/raid10:%s: %s: rescheduling sector %llu\n",
+                                  mdname(conf->mddev),
+                                  bdevname(conf->mirrors[dev].rdev->bdev, b),
+                                  (unsigned long long)r10_bio->sector);
+               set_bit(R10BIO_ReadError, &r10_bio->state);
                reschedule_retry(r10_bio);
        }
 }
 
+static void close_write(r10bio_t *r10_bio)
+{
+       /* clear the bitmap if all writes complete successfully */
+       bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+                       r10_bio->sectors,
+                       !test_bit(R10BIO_Degraded, &r10_bio->state),
+                       0);
+       md_write_end(r10_bio->mddev);
+}
+
 static void raid10_end_write_request(struct bio *bio, int error)
 {
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        r10bio_t *r10_bio = bio->bi_private;
-       int slot, dev;
+       int dev;
+       int dec_rdev = 1;
        conf_t *conf = r10_bio->mddev->private;
+       int slot;
 
-       for (slot = 0; slot < conf->copies; slot++)
-               if (r10_bio->devs[slot].bio == bio)
-                       break;
-       dev = r10_bio->devs[slot].devnum;
+       dev = find_bio_disk(conf, r10_bio, bio, &slot);
 
        /*
         * this branch is our 'one mirror IO has finished' event handler:
         */
        if (!uptodate) {
-               md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
-               /* an I/O failed, we can't clear the bitmap */
-               set_bit(R10BIO_Degraded, &r10_bio->state);
-       } else
+               set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags);
+               set_bit(R10BIO_WriteError, &r10_bio->state);
+               dec_rdev = 0;
+       } else {
                /*
                 * Set R10BIO_Uptodate in our master bio, so that
                 * we will return a good error code for to the higher
@@ -314,9 +365,22 @@ static void raid10_end_write_request(struct bio *bio, int error)
                 * user-side. So if something waits for IO, then it will
                 * wait for the 'master' bio.
                 */
+               sector_t first_bad;
+               int bad_sectors;
+
                set_bit(R10BIO_Uptodate, &r10_bio->state);
 
-       update_head_pos(slot, r10_bio);
+               /* Maybe we can clear some bad blocks. */
+               if (is_badblock(conf->mirrors[dev].rdev,
+                               r10_bio->devs[slot].addr,
+                               r10_bio->sectors,
+                               &first_bad, &bad_sectors)) {
+                       bio_put(bio);
+                       r10_bio->devs[slot].bio = IO_MADE_GOOD;
+                       dec_rdev = 0;
+                       set_bit(R10BIO_MadeGood, &r10_bio->state);
+               }
+       }
 
        /*
         *
@@ -324,16 +388,18 @@ static void raid10_end_write_request(struct bio *bio, int error)
         * already.
         */
        if (atomic_dec_and_test(&r10_bio->remaining)) {
-               /* clear the bitmap if all writes complete successfully */
-               bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-                               r10_bio->sectors,
-                               !test_bit(R10BIO_Degraded, &r10_bio->state),
-                               0);
-               md_write_end(r10_bio->mddev);
-               raid_end_bio_io(r10_bio);
+               if (test_bit(R10BIO_WriteError, &r10_bio->state))
+                       reschedule_retry(r10_bio);
+               else {
+                       close_write(r10_bio);
+                       if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+                               reschedule_retry(r10_bio);
+                       else
+                               raid_end_bio_io(r10_bio);
+               }
        }
-
-       rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+       if (dec_rdev)
+               rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
 
 
@@ -484,11 +550,12 @@ static int raid10_mergeable_bvec(struct request_queue *q,
  * FIXME: possibly should rethink readbalancing and do it differently
  * depending on near_copies / far_copies geometry.
  */
-static int read_balance(conf_t *conf, r10bio_t *r10_bio)
+static int read_balance(conf_t *conf, r10bio_t *r10_bio, int *max_sectors)
 {
        const sector_t this_sector = r10_bio->sector;
        int disk, slot;
-       const int sectors = r10_bio->sectors;
+       int sectors = r10_bio->sectors;
+       int best_good_sectors;
        sector_t new_distance, best_dist;
        mdk_rdev_t *rdev;
        int do_balance;
@@ -497,8 +564,10 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
        raid10_find_phys(conf, r10_bio);
        rcu_read_lock();
 retry:
+       sectors = r10_bio->sectors;
        best_slot = -1;
        best_dist = MaxSector;
+       best_good_sectors = 0;
        do_balance = 1;
        /*
         * Check if we can balance. We can balance on the whole
@@ -511,6 +580,10 @@ retry:
                do_balance = 0;
 
        for (slot = 0; slot < conf->copies ; slot++) {
+               sector_t first_bad;
+               int bad_sectors;
+               sector_t dev_sector;
+
                if (r10_bio->devs[slot].bio == IO_BLOCKED)
                        continue;
                disk = r10_bio->devs[slot].devnum;
@@ -520,6 +593,37 @@ retry:
                if (!test_bit(In_sync, &rdev->flags))
                        continue;
 
+               dev_sector = r10_bio->devs[slot].addr;
+               if (is_badblock(rdev, dev_sector, sectors,
+                               &first_bad, &bad_sectors)) {
+                       if (best_dist < MaxSector)
+                               /* Already have a better slot */
+                               continue;
+                       if (first_bad <= dev_sector) {
+                               /* Cannot read here.  If this is the
+                                * 'primary' device, then we must not read
+                                * beyond 'bad_sectors' from another device.
+                                */
+                               bad_sectors -= (dev_sector - first_bad);
+                               if (!do_balance && sectors > bad_sectors)
+                                       sectors = bad_sectors;
+                               if (best_good_sectors > sectors)
+                                       best_good_sectors = sectors;
+                       } else {
+                               sector_t good_sectors =
+                                       first_bad - dev_sector;
+                               if (good_sectors > best_good_sectors) {
+                                       best_good_sectors = good_sectors;
+                                       best_slot = slot;
+                               }
+                               if (!do_balance)
+                                       /* Must read from here */
+                                       break;
+                       }
+                       continue;
+               } else
+                       best_good_sectors = sectors;
+
                if (!do_balance)
                        break;
 
@@ -561,6 +665,7 @@ retry:
        } else
                disk = -1;
        rcu_read_unlock();
+       *max_sectors = best_good_sectors;
 
        return disk;
 }
@@ -734,6 +839,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        unsigned long flags;
        mdk_rdev_t *blocked_rdev;
        int plugged;
+       int sectors_handled;
+       int max_sectors;
 
        if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bio);
@@ -808,12 +915,26 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        r10_bio->sector = bio->bi_sector;
        r10_bio->state = 0;
 
+       /* We might need to issue multiple reads to different
+        * devices if there are bad blocks around, so we keep
+        * track of the number of reads in bio->bi_phys_segments.
+        * If this is 0, there is only one r10_bio and no locking
+        * will be needed when the request completes.  If it is
+        * non-zero, then it is the number of not-completed requests.
+        */
+       bio->bi_phys_segments = 0;
+       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
        if (rw == READ) {
                /*
                 * read balancing logic:
                 */
-               int disk = read_balance(conf, r10_bio);
-               int slot = r10_bio->read_slot;
+               int disk;
+               int slot;
+
+read_again:
+               disk = read_balance(conf, r10_bio, &max_sectors);
+               slot = r10_bio->read_slot;
                if (disk < 0) {
                        raid_end_bio_io(r10_bio);
                        return 0;
@@ -821,6 +942,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                mirror = conf->mirrors + disk;
 
                read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+               md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector,
+                           max_sectors);
 
                r10_bio->devs[slot].bio = read_bio;
 
@@ -831,7 +954,37 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                read_bio->bi_rw = READ | do_sync;
                read_bio->bi_private = r10_bio;
 
-               generic_make_request(read_bio);
+               if (max_sectors < r10_bio->sectors) {
+                       /* Could not read all from this device, so we will
+                        * need another r10_bio.
+                        */
+                       sectors_handled = (r10_bio->sectors + max_sectors
+                                          - bio->bi_sector);
+                       r10_bio->sectors = max_sectors;
+                       spin_lock_irq(&conf->device_lock);
+                       if (bio->bi_phys_segments == 0)
+                               bio->bi_phys_segments = 2;
+                       else
+                               bio->bi_phys_segments++;
+                       spin_unlock(&conf->device_lock);
+                       /* Cannot call generic_make_request directly
+                        * as that will be queued in __generic_make_request
+                        * and subsequent mempool_alloc might block
+                        * waiting for it.  so hand bio over to raid10d.
+                        */
+                       reschedule_retry(r10_bio);
+
+                       r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+                       r10_bio->master_bio = bio;
+                       r10_bio->sectors = ((bio->bi_size >> 9)
+                                           - sectors_handled);
+                       r10_bio->state = 0;
+                       r10_bio->mddev = mddev;
+                       r10_bio->sector = bio->bi_sector + sectors_handled;
+                       goto read_again;
+               } else
+                       generic_make_request(read_bio);
                return 0;
        }
 
@@ -841,13 +994,22 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* first select target devices under rcu_lock and
         * inc refcount on their rdev.  Record them by setting
         * bios[x] to bio
+        * If there are known/acknowledged bad blocks on any device
+        * on which we have seen a write error, we want to avoid
+        * writing to those blocks.  This potentially requires several
+        * writes to write around the bad blocks.  Each set of writes
+        * gets its own r10_bio with a set of bios attached.  The number
+        * of r10_bios is recored in bio->bi_phys_segments just as with
+        * the read case.
         */
        plugged = mddev_check_plugged(mddev);
 
        raid10_find_phys(conf, r10_bio);
- retry_write:
+retry_write:
        blocked_rdev = NULL;
        rcu_read_lock();
+       max_sectors = r10_bio->sectors;
+
        for (i = 0;  i < conf->copies; i++) {
                int d = r10_bio->devs[i].devnum;
                mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
@@ -856,13 +1018,55 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                        blocked_rdev = rdev;
                        break;
                }
-               if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                       atomic_inc(&rdev->nr_pending);
-                       r10_bio->devs[i].bio = bio;
-               } else {
-                       r10_bio->devs[i].bio = NULL;
+               r10_bio->devs[i].bio = NULL;
+               if (!rdev || test_bit(Faulty, &rdev->flags)) {
                        set_bit(R10BIO_Degraded, &r10_bio->state);
+                       continue;
                }
+               if (test_bit(WriteErrorSeen, &rdev->flags)) {
+                       sector_t first_bad;
+                       sector_t dev_sector = r10_bio->devs[i].addr;
+                       int bad_sectors;
+                       int is_bad;
+
+                       is_bad = is_badblock(rdev, dev_sector,
+                                            max_sectors,
+                                            &first_bad, &bad_sectors);
+                       if (is_bad < 0) {
+                               /* Mustn't write here until the bad block
+                                * is acknowledged
+                                */
+                               atomic_inc(&rdev->nr_pending);
+                               set_bit(BlockedBadBlocks, &rdev->flags);
+                               blocked_rdev = rdev;
+                               break;
+                       }
+                       if (is_bad && first_bad <= dev_sector) {
+                               /* Cannot write here at all */
+                               bad_sectors -= (dev_sector - first_bad);
+                               if (bad_sectors < max_sectors)
+                                       /* Mustn't write more than bad_sectors
+                                        * to other devices yet
+                                        */
+                                       max_sectors = bad_sectors;
+                               /* We don't set R10BIO_Degraded as that
+                                * only applies if the disk is missing,
+                                * so it might be re-added, and we want to
+                                * know to recover this chunk.
+                                * In this case the device is here, and the
+                                * fact that this chunk is not in-sync is
+                                * recorded in the bad block log.
+                                */
+                               continue;
+                       }
+                       if (is_bad) {
+                               int good_sectors = first_bad - dev_sector;
+                               if (good_sectors < max_sectors)
+                                       max_sectors = good_sectors;
+                       }
+               }
+               r10_bio->devs[i].bio = bio;
+               atomic_inc(&rdev->nr_pending);
        }
        rcu_read_unlock();
 
@@ -882,8 +1086,22 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                goto retry_write;
        }
 
+       if (max_sectors < r10_bio->sectors) {
+               /* We are splitting this into multiple parts, so
+                * we need to prepare for allocating another r10_bio.
+                */
+               r10_bio->sectors = max_sectors;
+               spin_lock_irq(&conf->device_lock);
+               if (bio->bi_phys_segments == 0)
+                       bio->bi_phys_segments = 2;
+               else
+                       bio->bi_phys_segments++;
+               spin_unlock_irq(&conf->device_lock);
+       }
+       sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
+
        atomic_set(&r10_bio->remaining, 1);
-       bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
+       bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
 
        for (i = 0; i < conf->copies; i++) {
                struct bio *mbio;
@@ -892,10 +1110,12 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                        continue;
 
                mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+               md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+                           max_sectors);
                r10_bio->devs[i].bio = mbio;
 
-               mbio->bi_sector = r10_bio->devs[i].addr+
-                       conf->mirrors[d].rdev->data_offset;
+               mbio->bi_sector = (r10_bio->devs[i].addr+
+                                  conf->mirrors[d].rdev->data_offset);
                mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                mbio->bi_end_io = raid10_end_write_request;
                mbio->bi_rw = WRITE | do_sync | do_fua;
@@ -920,6 +1140,21 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        /* In case raid10d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
 
+       if (sectors_handled < (bio->bi_size >> 9)) {
+               /* We need another r10_bio.  It has already been counted
+                * in bio->bi_phys_segments.
+                */
+               r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+               r10_bio->master_bio = bio;
+               r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+
+               r10_bio->mddev = mddev;
+               r10_bio->sector = bio->bi_sector + sectors_handled;
+               r10_bio->state = 0;
+               goto retry_write;
+       }
+
        if (do_sync || !mddev->bitmap || !plugged)
                md_wakeup_thread(mddev->thread);
        return 0;
@@ -949,6 +1184,30 @@ static void status(struct seq_file *seq, mddev_t *mddev)
        seq_printf(seq, "]");
 }
 
+/* check if there are enough drives for
+ * every block to appear on atleast one.
+ * Don't consider the device numbered 'ignore'
+ * as we might be about to remove it.
+ */
+static int enough(conf_t *conf, int ignore)
+{
+       int first = 0;
+
+       do {
+               int n = conf->copies;
+               int cnt = 0;
+               while (n--) {
+                       if (conf->mirrors[first].rdev &&
+                           first != ignore)
+                               cnt++;
+                       first = (first+1) % conf->raid_disks;
+               }
+               if (cnt == 0)
+                       return 0;
+       } while (first != 0);
+       return 1;
+}
+
 static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        char b[BDEVNAME_SIZE];
@@ -961,13 +1220,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
         * else mark the drive as failed
         */
        if (test_bit(In_sync, &rdev->flags)
-           && conf->raid_disks-mddev->degraded == 1)
+           && !enough(conf, rdev->raid_disk))
                /*
                 * Don't fail the drive, just return an IO error.
-                * The test should really be more sophisticated than
-                * "working_disks == 1", but it isn't critical, and
-                * can wait until we do more sophisticated "is the drive
-                * really dead" tests...
                 */
                return;
        if (test_and_clear_bit(In_sync, &rdev->flags)) {
@@ -980,6 +1235,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 */
                set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        }
+       set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
        printk(KERN_ALERT
@@ -1022,27 +1278,6 @@ static void close_sync(conf_t *conf)
        conf->r10buf_pool = NULL;
 }
 
-/* check if there are enough drives for
- * every block to appear on atleast one
- */
-static int enough(conf_t *conf)
-{
-       int first = 0;
-
-       do {
-               int n = conf->copies;
-               int cnt = 0;
-               while (n--) {
-                       if (conf->mirrors[first].rdev)
-                               cnt++;
-                       first = (first+1) % conf->raid_disks;
-               }
-               if (cnt == 0)
-                       return 0;
-       } while (first != 0);
-       return 1;
-}
-
 static int raid10_spare_active(mddev_t *mddev)
 {
        int i;
@@ -1078,7 +1313,6 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
        conf_t *conf = mddev->private;
        int err = -EEXIST;
        int mirror;
-       mirror_info_t *p;
        int first = 0;
        int last = conf->raid_disks - 1;
 
@@ -1087,44 +1321,47 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
                 * very different from resync
                 */
                return -EBUSY;
-       if (!enough(conf))
+       if (!enough(conf, -1))
                return -EINVAL;
 
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
 
-       if (rdev->saved_raid_disk >= 0 &&
-           rdev->saved_raid_disk >= first &&
+       if (rdev->saved_raid_disk >= first &&
            conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                mirror = rdev->saved_raid_disk;
        else
                mirror = first;
-       for ( ; mirror <= last ; mirror++)
-               if ( !(p=conf->mirrors+mirror)->rdev) {
-
-                       disk_stack_limits(mddev->gendisk, rdev->bdev,
-                                         rdev->data_offset << 9);
-                       /* as we don't honour merge_bvec_fn, we must
-                        * never risk violating it, so limit
-                        * ->max_segments to one lying with a single
-                        * page, as a one page request is never in
-                        * violation.
-                        */
-                       if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
-                               blk_queue_max_segments(mddev->queue, 1);
-                               blk_queue_segment_boundary(mddev->queue,
-                                                          PAGE_CACHE_SIZE - 1);
-                       }
+       for ( ; mirror <= last ; mirror++) {
+               mirror_info_t *p = &conf->mirrors[mirror];
+               if (p->recovery_disabled == mddev->recovery_disabled)
+                       continue;
+               if (!p->rdev)
+                       continue;
 
-                       p->head_position = 0;
-                       rdev->raid_disk = mirror;
-                       err = 0;
-                       if (rdev->saved_raid_disk != mirror)
-                               conf->fullsync = 1;
-                       rcu_assign_pointer(p->rdev, rdev);
-                       break;
+               disk_stack_limits(mddev->gendisk, rdev->bdev,
+                                 rdev->data_offset << 9);
+               /* as we don't honour merge_bvec_fn, we must
+                * never risk violating it, so limit
+                * ->max_segments to one lying with a single
+                * page, as a one page request is never in
+                * violation.
+                */
+               if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+                       blk_queue_max_segments(mddev->queue, 1);
+                       blk_queue_segment_boundary(mddev->queue,
+                                                  PAGE_CACHE_SIZE - 1);
                }
 
+               p->head_position = 0;
+               rdev->raid_disk = mirror;
+               err = 0;
+               if (rdev->saved_raid_disk != mirror)
+                       conf->fullsync = 1;
+               rcu_assign_pointer(p->rdev, rdev);
+               break;
+       }
+
        md_integrity_add_rdev(rdev, mddev);
        print_conf(conf);
        return err;
@@ -1149,7 +1386,8 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
                 * is not possible.
                 */
                if (!test_bit(Faulty, &rdev->flags) &&
-                   enough(conf)) {
+                   mddev->recovery_disabled != p->recovery_disabled &&
+                   enough(conf, -1)) {
                        err = -EBUSY;
                        goto abort;
                }
@@ -1174,24 +1412,18 @@ static void end_sync_read(struct bio *bio, int error)
 {
        r10bio_t *r10_bio = bio->bi_private;
        conf_t *conf = r10_bio->mddev->private;
-       int i,d;
+       int d;
 
-       for (i=0; i<conf->copies; i++)
-               if (r10_bio->devs[i].bio == bio)
-                       break;
-       BUG_ON(i == conf->copies);
-       update_head_pos(i, r10_bio);
-       d = r10_bio->devs[i].devnum;
+       d = find_bio_disk(conf, r10_bio, bio, NULL);
 
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                set_bit(R10BIO_Uptodate, &r10_bio->state);
-       else {
+       else
+               /* The write handler will notice the lack of
+                * R10BIO_Uptodate and record any errors etc
+                */
                atomic_add(r10_bio->sectors,
                           &conf->mirrors[d].rdev->corrected_errors);
-               if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
-                       md_error(r10_bio->mddev,
-                                conf->mirrors[d].rdev);
-       }
 
        /* for reconstruct, we always reschedule after a read.
         * for resync, only after all reads
@@ -1206,40 +1438,60 @@ static void end_sync_read(struct bio *bio, int error)
        }
 }
 
-static void end_sync_write(struct bio *bio, int error)
+static void end_sync_request(r10bio_t *r10_bio)
 {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       r10bio_t *r10_bio = bio->bi_private;
        mddev_t *mddev = r10_bio->mddev;
-       conf_t *conf = mddev->private;
-       int i,d;
-
-       for (i = 0; i < conf->copies; i++)
-               if (r10_bio->devs[i].bio == bio)
-                       break;
-       d = r10_bio->devs[i].devnum;
 
-       if (!uptodate)
-               md_error(mddev, conf->mirrors[d].rdev);
-
-       update_head_pos(i, r10_bio);
-
-       rdev_dec_pending(conf->mirrors[d].rdev, mddev);
        while (atomic_dec_and_test(&r10_bio->remaining)) {
                if (r10_bio->master_bio == NULL) {
                        /* the primary of several recovery bios */
                        sector_t s = r10_bio->sectors;
-                       put_buf(r10_bio);
+                       if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+                           test_bit(R10BIO_WriteError, &r10_bio->state))
+                               reschedule_retry(r10_bio);
+                       else
+                               put_buf(r10_bio);
                        md_done_sync(mddev, s, 1);
                        break;
                } else {
                        r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
-                       put_buf(r10_bio);
+                       if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+                           test_bit(R10BIO_WriteError, &r10_bio->state))
+                               reschedule_retry(r10_bio);
+                       else
+                               put_buf(r10_bio);
                        r10_bio = r10_bio2;
                }
        }
 }
 
+static void end_sync_write(struct bio *bio, int error)
+{
+       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       r10bio_t *r10_bio = bio->bi_private;
+       mddev_t *mddev = r10_bio->mddev;
+       conf_t *conf = mddev->private;
+       int d;
+       sector_t first_bad;
+       int bad_sectors;
+       int slot;
+
+       d = find_bio_disk(conf, r10_bio, bio, &slot);
+
+       if (!uptodate) {
+               set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
+               set_bit(R10BIO_WriteError, &r10_bio->state);
+       } else if (is_badblock(conf->mirrors[d].rdev,
+                            r10_bio->devs[slot].addr,
+                            r10_bio->sectors,
+                            &first_bad, &bad_sectors))
+               set_bit(R10BIO_MadeGood, &r10_bio->state);
+
+       rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+
+       end_sync_request(r10_bio);
+}
+
 /*
  * Note: sync and recover and handled very differently for raid10
  * This code is for resync.
@@ -1299,11 +1551,12 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
                        if (j == vcnt)
                                continue;
                        mddev->resync_mismatches += r10_bio->sectors;
+                       if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+                               /* Don't fix anything. */
+                               continue;
                }
-               if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
-                       /* Don't fix anything. */
-                       continue;
-               /* Ok, we need to write this bio
+               /* Ok, we need to write this bio, either to correct an
+                * inconsistency or to correct an unreadable block.
                 * First we need to fixup bv_offset, bv_len and
                 * bi_vecs, as the read request might have corrupted these
                 */
@@ -1355,32 +1608,107 @@ done:
  * The second for writing.
  *
  */
+static void fix_recovery_read_error(r10bio_t *r10_bio)
+{
+       /* We got a read error during recovery.
+        * We repeat the read in smaller page-sized sections.
+        * If a read succeeds, write it to the new device or record
+        * a bad block if we cannot.
+        * If a read fails, record a bad block on both old and
+        * new devices.
+        */
+       mddev_t *mddev = r10_bio->mddev;
+       conf_t *conf = mddev->private;
+       struct bio *bio = r10_bio->devs[0].bio;
+       sector_t sect = 0;
+       int sectors = r10_bio->sectors;
+       int idx = 0;
+       int dr = r10_bio->devs[0].devnum;
+       int dw = r10_bio->devs[1].devnum;
+
+       while (sectors) {
+               int s = sectors;
+               mdk_rdev_t *rdev;
+               sector_t addr;
+               int ok;
+
+               if (s > (PAGE_SIZE>>9))
+                       s = PAGE_SIZE >> 9;
+
+               rdev = conf->mirrors[dr].rdev;
+               addr = r10_bio->devs[0].addr + sect,
+               ok = sync_page_io(rdev,
+                                 addr,
+                                 s << 9,
+                                 bio->bi_io_vec[idx].bv_page,
+                                 READ, false);
+               if (ok) {
+                       rdev = conf->mirrors[dw].rdev;
+                       addr = r10_bio->devs[1].addr + sect;
+                       ok = sync_page_io(rdev,
+                                         addr,
+                                         s << 9,
+                                         bio->bi_io_vec[idx].bv_page,
+                                         WRITE, false);
+                       if (!ok)
+                               set_bit(WriteErrorSeen, &rdev->flags);
+               }
+               if (!ok) {
+                       /* We don't worry if we cannot set a bad block -
+                        * it really is bad so there is no loss in not
+                        * recording it yet
+                        */
+                       rdev_set_badblocks(rdev, addr, s, 0);
+
+                       if (rdev != conf->mirrors[dw].rdev) {
+                               /* need bad block on destination too */
+                               mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev;
+                               addr = r10_bio->devs[1].addr + sect;
+                               ok = rdev_set_badblocks(rdev2, addr, s, 0);
+                               if (!ok) {
+                                       /* just abort the recovery */
+                                       printk(KERN_NOTICE
+                                              "md/raid10:%s: recovery aborted"
+                                              " due to read error\n",
+                                              mdname(mddev));
+
+                                       conf->mirrors[dw].recovery_disabled
+                                               = mddev->recovery_disabled;
+                                       set_bit(MD_RECOVERY_INTR,
+                                               &mddev->recovery);
+                                       break;
+                               }
+                       }
+               }
+
+               sectors -= s;
+               sect += s;
+               idx++;
+       }
+}
 
 static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 {
        conf_t *conf = mddev->private;
-       int i, d;
-       struct bio *bio, *wbio;
+       int d;
+       struct bio *wbio;
 
+       if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
+               fix_recovery_read_error(r10_bio);
+               end_sync_request(r10_bio);
+               return;
+       }
 
-       /* move the pages across to the second bio
+       /*
+        * share the pages with the first bio
         * and submit the write request
         */
-       bio = r10_bio->devs[0].bio;
        wbio = r10_bio->devs[1].bio;
-       for (i=0; i < wbio->bi_vcnt; i++) {
-               struct page *p = bio->bi_io_vec[i].bv_page;
-               bio->bi_io_vec[i].bv_page = wbio->bi_io_vec[i].bv_page;
-               wbio->bi_io_vec[i].bv_page = p;
-       }
        d = r10_bio->devs[1].devnum;
 
        atomic_inc(&conf->mirrors[d].rdev->nr_pending);
        md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-       if (test_bit(R10BIO_Uptodate, &r10_bio->state))
-               generic_make_request(wbio);
-       else
-               bio_endio(wbio, -EIO);
+       generic_make_request(wbio);
 }
 
 
@@ -1421,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
                atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
 }
 
+static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+                           int sectors, struct page *page, int rw)
+{
+       sector_t first_bad;
+       int bad_sectors;
+
+       if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
+           && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+               return -1;
+       if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+               /* success */
+               return 1;
+       if (rw == WRITE)
+               set_bit(WriteErrorSeen, &rdev->flags);
+       /* need to record an error - either for the block or the device */
+       if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+               md_error(rdev->mddev, rdev);
+       return 0;
+}
+
 /*
  * This is a kernel thread which:
  *
@@ -1476,10 +1824,15 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
 
                rcu_read_lock();
                do {
+                       sector_t first_bad;
+                       int bad_sectors;
+
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (rdev &&
-                           test_bit(In_sync, &rdev->flags)) {
+                           test_bit(In_sync, &rdev->flags) &&
+                           is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
+                                       &first_bad, &bad_sectors) == 0) {
                                atomic_inc(&rdev->nr_pending);
                                rcu_read_unlock();
                                success = sync_page_io(rdev,
@@ -1499,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                rcu_read_unlock();
 
                if (!success) {
-                       /* Cannot read from anywhere -- bye bye array */
+                       /* Cannot read from anywhere, just mark the block
+                        * as bad on the first device to discourage future
+                        * reads.
+                        */
                        int dn = r10_bio->devs[r10_bio->read_slot].devnum;
-                       md_error(mddev, conf->mirrors[dn].rdev);
+                       rdev = conf->mirrors[dn].rdev;
+
+                       if (!rdev_set_badblocks(
+                                   rdev,
+                                   r10_bio->devs[r10_bio->read_slot].addr
+                                   + sect,
+                                   s, 0))
+                               md_error(mddev, rdev);
                        break;
                }
 
@@ -1516,80 +1879,82 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
                        sl--;
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
-                       if (rdev &&
-                           test_bit(In_sync, &rdev->flags)) {
-                               atomic_inc(&rdev->nr_pending);
-                               rcu_read_unlock();
-                               atomic_add(s, &rdev->corrected_errors);
-                               if (sync_page_io(rdev,
-                                                r10_bio->devs[sl].addr +
-                                                sect,
-                                                s<<9, conf->tmppage, WRITE, false)
-                                   == 0) {
-                                       /* Well, this device is dead */
-                                       printk(KERN_NOTICE
-                                              "md/raid10:%s: read correction "
-                                              "write failed"
-                                              " (%d sectors at %llu on %s)\n",
-                                              mdname(mddev), s,
-                                              (unsigned long long)(
-                                                      sect + rdev->data_offset),
-                                              bdevname(rdev->bdev, b));
-                                       printk(KERN_NOTICE "md/raid10:%s: %s: failing "
-                                              "drive\n",
-                                              mdname(mddev),
-                                              bdevname(rdev->bdev, b));
-                                       md_error(mddev, rdev);
-                               }
-                               rdev_dec_pending(rdev, mddev);
-                               rcu_read_lock();
+                       if (!rdev ||
+                           !test_bit(In_sync, &rdev->flags))
+                               continue;
+
+                       atomic_inc(&rdev->nr_pending);
+                       rcu_read_unlock();
+                       if (r10_sync_page_io(rdev,
+                                            r10_bio->devs[sl].addr +
+                                            sect,
+                                            s<<9, conf->tmppage, WRITE)
+                           == 0) {
+                               /* Well, this device is dead */
+                               printk(KERN_NOTICE
+                                      "md/raid10:%s: read correction "
+                                      "write failed"
+                                      " (%d sectors at %llu on %s)\n",
+                                      mdname(mddev), s,
+                                      (unsigned long long)(
+                                              sect + rdev->data_offset),
+                                      bdevname(rdev->bdev, b));
+                               printk(KERN_NOTICE "md/raid10:%s: %s: failing "
+                                      "drive\n",
+                                      mdname(mddev),
+                                      bdevname(rdev->bdev, b));
                        }
+                       rdev_dec_pending(rdev, mddev);
+                       rcu_read_lock();
                }
                sl = start;
                while (sl != r10_bio->read_slot) {
+                       char b[BDEVNAME_SIZE];
 
                        if (sl==0)
                                sl = conf->copies;
                        sl--;
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
-                       if (rdev &&
-                           test_bit(In_sync, &rdev->flags)) {
-                               char b[BDEVNAME_SIZE];
-                               atomic_inc(&rdev->nr_pending);
-                               rcu_read_unlock();
-                               if (sync_page_io(rdev,
-                                                r10_bio->devs[sl].addr +
-                                                sect,
-                                                s<<9, conf->tmppage,
-                                                READ, false) == 0) {
-                                       /* Well, this device is dead */
-                                       printk(KERN_NOTICE
-                                              "md/raid10:%s: unable to read back "
-                                              "corrected sectors"
-                                              " (%d sectors at %llu on %s)\n",
-                                              mdname(mddev), s,
-                                              (unsigned long long)(
-                                                      sect + rdev->data_offset),
-                                              bdevname(rdev->bdev, b));
-                                       printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
-                                              mdname(mddev),
-                                              bdevname(rdev->bdev, b));
-
-                                       md_error(mddev, rdev);
-                               } else {
-                                       printk(KERN_INFO
-                                              "md/raid10:%s: read error corrected"
-                                              " (%d sectors at %llu on %s)\n",
-                                              mdname(mddev), s,
-                                              (unsigned long long)(
-                                                      sect + rdev->data_offset),
-                                              bdevname(rdev->bdev, b));
-                               }
+                       if (!rdev ||
+                           !test_bit(In_sync, &rdev->flags))
+                               continue;
 
-                               rdev_dec_pending(rdev, mddev);
-                               rcu_read_lock();
+                       atomic_inc(&rdev->nr_pending);
+                       rcu_read_unlock();
+                       switch (r10_sync_page_io(rdev,
+                                            r10_bio->devs[sl].addr +
+                                            sect,
+                                            s<<9, conf->tmppage,
+                                                READ)) {
+                       case 0:
+                               /* Well, this device is dead */
+                               printk(KERN_NOTICE
+                                      "md/raid10:%s: unable to read back "
+                                      "corrected sectors"
+                                      " (%d sectors at %llu on %s)\n",
+                                      mdname(mddev), s,
+                                      (unsigned long long)(
+                                              sect + rdev->data_offset),
+                                      bdevname(rdev->bdev, b));
+                               printk(KERN_NOTICE "md/raid10:%s: %s: failing "
+                                      "drive\n",
+                                      mdname(mddev),
+                                      bdevname(rdev->bdev, b));
+                               break;
+                       case 1:
+                               printk(KERN_INFO
+                                      "md/raid10:%s: read error corrected"
+                                      " (%d sectors at %llu on %s)\n",
+                                      mdname(mddev), s,
+                                      (unsigned long long)(
+                                              sect + rdev->data_offset),
+                                      bdevname(rdev->bdev, b));
+                               atomic_add(s, &rdev->corrected_errors);
                        }
+
+                       rdev_dec_pending(rdev, mddev);
+                       rcu_read_lock();
                }
                rcu_read_unlock();
 
@@ -1598,21 +1963,254 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
        }
 }
 
+static void bi_complete(struct bio *bio, int error)
+{
+       complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+       struct completion event;
+       rw |= REQ_SYNC;
+
+       init_completion(&event);
+       bio->bi_private = &event;
+       bio->bi_end_io = bi_complete;
+       submit_bio(rw, bio);
+       wait_for_completion(&event);
+
+       return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r10bio_t *r10_bio, int i)
+{
+       struct bio *bio = r10_bio->master_bio;
+       mddev_t *mddev = r10_bio->mddev;
+       conf_t *conf = mddev->private;
+       mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
+       /* bio has the data to be written to slot 'i' where
+        * we just recently had a write error.
+        * We repeatedly clone the bio and trim down to one block,
+        * then try the write.  Where the write fails we record
+        * a bad block.
+        * It is conceivable that the bio doesn't exactly align with
+        * blocks.  We must handle this.
+        *
+        * We currently own a reference to the rdev.
+        */
+
+       int block_sectors;
+       sector_t sector;
+       int sectors;
+       int sect_to_write = r10_bio->sectors;
+       int ok = 1;
+
+       if (rdev->badblocks.shift < 0)
+               return 0;
+
+       block_sectors = 1 << rdev->badblocks.shift;
+       sector = r10_bio->sector;
+       sectors = ((r10_bio->sector + block_sectors)
+                  & ~(sector_t)(block_sectors - 1))
+               - sector;
+
+       while (sect_to_write) {
+               struct bio *wbio;
+               if (sectors > sect_to_write)
+                       sectors = sect_to_write;
+               /* Write at 'sector' for 'sectors' */
+               wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+               md_trim_bio(wbio, sector - bio->bi_sector, sectors);
+               wbio->bi_sector = (r10_bio->devs[i].addr+
+                                  rdev->data_offset+
+                                  (sector - r10_bio->sector));
+               wbio->bi_bdev = rdev->bdev;
+               if (submit_bio_wait(WRITE, wbio) == 0)
+                       /* Failure! */
+                       ok = rdev_set_badblocks(rdev, sector,
+                                               sectors, 0)
+                               && ok;
+
+               bio_put(wbio);
+               sect_to_write -= sectors;
+               sector += sectors;
+               sectors = block_sectors;
+       }
+       return ok;
+}
+
+static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio)
+{
+       int slot = r10_bio->read_slot;
+       int mirror = r10_bio->devs[slot].devnum;
+       struct bio *bio;
+       conf_t *conf = mddev->private;
+       mdk_rdev_t *rdev;
+       char b[BDEVNAME_SIZE];
+       unsigned long do_sync;
+       int max_sectors;
+
+       /* we got a read error. Maybe the drive is bad.  Maybe just
+        * the block and we can fix it.
+        * We freeze all other IO, and try reading the block from
+        * other devices.  When we find one, we re-write
+        * and check it that fixes the read error.
+        * This is all done synchronously while the array is
+        * frozen.
+        */
+       if (mddev->ro == 0) {
+               freeze_array(conf);
+               fix_read_error(conf, mddev, r10_bio);
+               unfreeze_array(conf);
+       }
+       rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
+
+       bio = r10_bio->devs[slot].bio;
+       bdevname(bio->bi_bdev, b);
+       r10_bio->devs[slot].bio =
+               mddev->ro ? IO_BLOCKED : NULL;
+read_more:
+       mirror = read_balance(conf, r10_bio, &max_sectors);
+       if (mirror == -1) {
+               printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
+                      " read error for block %llu\n",
+                      mdname(mddev), b,
+                      (unsigned long long)r10_bio->sector);
+               raid_end_bio_io(r10_bio);
+               bio_put(bio);
+               return;
+       }
+
+       do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
+       if (bio)
+               bio_put(bio);
+       slot = r10_bio->read_slot;
+       rdev = conf->mirrors[mirror].rdev;
+       printk_ratelimited(
+               KERN_ERR
+               "md/raid10:%s: %s: redirecting"
+               "sector %llu to another mirror\n",
+               mdname(mddev),
+               bdevname(rdev->bdev, b),
+               (unsigned long long)r10_bio->sector);
+       bio = bio_clone_mddev(r10_bio->master_bio,
+                             GFP_NOIO, mddev);
+       md_trim_bio(bio,
+                   r10_bio->sector - bio->bi_sector,
+                   max_sectors);
+       r10_bio->devs[slot].bio = bio;
+       bio->bi_sector = r10_bio->devs[slot].addr
+               + rdev->data_offset;
+       bio->bi_bdev = rdev->bdev;
+       bio->bi_rw = READ | do_sync;
+       bio->bi_private = r10_bio;
+       bio->bi_end_io = raid10_end_read_request;
+       if (max_sectors < r10_bio->sectors) {
+               /* Drat - have to split this up more */
+               struct bio *mbio = r10_bio->master_bio;
+               int sectors_handled =
+                       r10_bio->sector + max_sectors
+                       - mbio->bi_sector;
+               r10_bio->sectors = max_sectors;
+               spin_lock_irq(&conf->device_lock);
+               if (mbio->bi_phys_segments == 0)
+                       mbio->bi_phys_segments = 2;
+               else
+                       mbio->bi_phys_segments++;
+               spin_unlock_irq(&conf->device_lock);
+               generic_make_request(bio);
+               bio = NULL;
+
+               r10_bio = mempool_alloc(conf->r10bio_pool,
+                                       GFP_NOIO);
+               r10_bio->master_bio = mbio;
+               r10_bio->sectors = (mbio->bi_size >> 9)
+                       - sectors_handled;
+               r10_bio->state = 0;
+               set_bit(R10BIO_ReadError,
+                       &r10_bio->state);
+               r10_bio->mddev = mddev;
+               r10_bio->sector = mbio->bi_sector
+                       + sectors_handled;
+
+               goto read_more;
+       } else
+               generic_make_request(bio);
+}
+
+static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio)
+{
+       /* Some sort of write request has finished and it
+        * succeeded in writing where we thought there was a
+        * bad block.  So forget the bad block.
+        * Or possibly if failed and we need to record
+        * a bad block.
+        */
+       int m;
+       mdk_rdev_t *rdev;
+
+       if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
+           test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+               for (m = 0; m < conf->copies; m++) {
+                       int dev = r10_bio->devs[m].devnum;
+                       rdev = conf->mirrors[dev].rdev;
+                       if (r10_bio->devs[m].bio == NULL)
+                               continue;
+                       if (test_bit(BIO_UPTODATE,
+                                    &r10_bio->devs[m].bio->bi_flags)) {
+                               rdev_clear_badblocks(
+                                       rdev,
+                                       r10_bio->devs[m].addr,
+                                       r10_bio->sectors);
+                       } else {
+                               if (!rdev_set_badblocks(
+                                           rdev,
+                                           r10_bio->devs[m].addr,
+                                           r10_bio->sectors, 0))
+                                       md_error(conf->mddev, rdev);
+                       }
+               }
+               put_buf(r10_bio);
+       } else {
+               for (m = 0; m < conf->copies; m++) {
+                       int dev = r10_bio->devs[m].devnum;
+                       struct bio *bio = r10_bio->devs[m].bio;
+                       rdev = conf->mirrors[dev].rdev;
+                       if (bio == IO_MADE_GOOD) {
+                               rdev_clear_badblocks(
+                                       rdev,
+                                       r10_bio->devs[m].addr,
+                                       r10_bio->sectors);
+                               rdev_dec_pending(rdev, conf->mddev);
+                       } else if (bio != NULL &&
+                                  !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+                               if (!narrow_write_error(r10_bio, m)) {
+                                       md_error(conf->mddev, rdev);
+                                       set_bit(R10BIO_Degraded,
+                                               &r10_bio->state);
+                               }
+                               rdev_dec_pending(rdev, conf->mddev);
+                       }
+               }
+               if (test_bit(R10BIO_WriteError,
+                            &r10_bio->state))
+                       close_write(r10_bio);
+               raid_end_bio_io(r10_bio);
+       }
+}
+
 static void raid10d(mddev_t *mddev)
 {
        r10bio_t *r10_bio;
-       struct bio *bio;
        unsigned long flags;
        conf_t *conf = mddev->private;
        struct list_head *head = &conf->retry_list;
-       mdk_rdev_t *rdev;
        struct blk_plug plug;
 
        md_check_recovery(mddev);
 
        blk_start_plug(&plug);
        for (;;) {
-               char b[BDEVNAME_SIZE];
 
                flush_pending_writes(conf);
 
@@ -1628,64 +2226,26 @@ static void raid10d(mddev_t *mddev)
 
                mddev = r10_bio->mddev;
                conf = mddev->private;
-               if (test_bit(R10BIO_IsSync, &r10_bio->state))
+               if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+                   test_bit(R10BIO_WriteError, &r10_bio->state))
+                       handle_write_completed(conf, r10_bio);
+               else if (test_bit(R10BIO_IsSync, &r10_bio->state))
                        sync_request_write(mddev, r10_bio);
                else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
                        recovery_request_write(mddev, r10_bio);
+               else if (test_bit(R10BIO_ReadError, &r10_bio->state))
+                       handle_read_error(mddev, r10_bio);
                else {
-                       int slot = r10_bio->read_slot;
-                       int mirror = r10_bio->devs[slot].devnum;
-                       /* we got a read error. Maybe the drive is bad.  Maybe just
-                        * the block and we can fix it.
-                        * We freeze all other IO, and try reading the block from
-                        * other devices.  When we find one, we re-write
-                        * and check it that fixes the read error.
-                        * This is all done synchronously while the array is
-                        * frozen.
+                       /* just a partial read to be scheduled from a
+                        * separate context
                         */
-                       if (mddev->ro == 0) {
-                               freeze_array(conf);
-                               fix_read_error(conf, mddev, r10_bio);
-                               unfreeze_array(conf);
-                       }
-                       rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
-
-                       bio = r10_bio->devs[slot].bio;
-                       r10_bio->devs[slot].bio =
-                               mddev->ro ? IO_BLOCKED : NULL;
-                       mirror = read_balance(conf, r10_bio);
-                       if (mirror == -1) {
-                               printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
-                                      " read error for block %llu\n",
-                                      mdname(mddev),
-                                      bdevname(bio->bi_bdev,b),
-                                      (unsigned long long)r10_bio->sector);
-                               raid_end_bio_io(r10_bio);
-                               bio_put(bio);
-                       } else {
-                               const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
-                               bio_put(bio);
-                               slot = r10_bio->read_slot;
-                               rdev = conf->mirrors[mirror].rdev;
-                               if (printk_ratelimit())
-                                       printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
-                                              " another mirror\n",
-                                              mdname(mddev),
-                                              bdevname(rdev->bdev,b),
-                                              (unsigned long long)r10_bio->sector);
-                               bio = bio_clone_mddev(r10_bio->master_bio,
-                                                     GFP_NOIO, mddev);
-                               r10_bio->devs[slot].bio = bio;
-                               bio->bi_sector = r10_bio->devs[slot].addr
-                                       + rdev->data_offset;
-                               bio->bi_bdev = rdev->bdev;
-                               bio->bi_rw = READ | do_sync;
-                               bio->bi_private = r10_bio;
-                               bio->bi_end_io = raid10_end_read_request;
-                               generic_make_request(bio);
-                       }
+                       int slot = r10_bio->read_slot;
+                       generic_make_request(r10_bio->devs[slot].bio);
                }
+
                cond_resched();
+               if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+                       md_check_recovery(mddev);
        }
        blk_finish_plug(&plug);
 }
@@ -1746,7 +2306,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
        int i;
        int max_sync;
        sector_t sync_blocks;
-
        sector_t sectors_skipped = 0;
        int chunks_skipped = 0;
 
@@ -1828,7 +2387,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
        max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
        if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                /* recovery... the complicated one */
-               int j, k;
+               int j;
                r10_bio = NULL;
 
                for (i=0 ; i<conf->raid_disks; i++) {
@@ -1836,6 +2395,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
                        r10bio_t *rb2;
                        sector_t sect;
                        int must_sync;
+                       int any_working;
 
                        if (conf->mirrors[i].rdev == NULL ||
                            test_bit(In_sync, &conf->mirrors[i].rdev->flags)) 
@@ -1887,19 +2447,42 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
                        must_sync = bitmap_start_sync(mddev->bitmap, sect,
                                                      &sync_blocks, still_degraded);
 
+                       any_working = 0;
                        for (j=0; j<conf->copies;j++) {
+                               int k;
                                int d = r10_bio->devs[j].devnum;
+                               sector_t from_addr, to_addr;
+                               mdk_rdev_t *rdev;
+                               sector_t sector, first_bad;
+                               int bad_sectors;
                                if (!conf->mirrors[d].rdev ||
                                    !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
                                        continue;
                                /* This is where we read from */
+                               any_working = 1;
+                               rdev = conf->mirrors[d].rdev;
+                               sector = r10_bio->devs[j].addr;
+
+                               if (is_badblock(rdev, sector, max_sync,
+                                               &first_bad, &bad_sectors)) {
+                                       if (first_bad > sector)
+                                               max_sync = first_bad - sector;
+                                       else {
+                                               bad_sectors -= (sector
+                                                               - first_bad);
+                                               if (max_sync > bad_sectors)
+                                                       max_sync = bad_sectors;
+                                               continue;
+                                       }
+                               }
                                bio = r10_bio->devs[0].bio;
                                bio->bi_next = biolist;
                                biolist = bio;
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = end_sync_read;
                                bio->bi_rw = READ;
-                               bio->bi_sector = r10_bio->devs[j].addr +
+                               from_addr = r10_bio->devs[j].addr;
+                               bio->bi_sector = from_addr +
                                        conf->mirrors[d].rdev->data_offset;
                                bio->bi_bdev = conf->mirrors[d].rdev->bdev;
                                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@ -1916,26 +2499,48 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = end_sync_write;
                                bio->bi_rw = WRITE;
-                               bio->bi_sector = r10_bio->devs[k].addr +
+                               to_addr = r10_bio->devs[k].addr;
+                               bio->bi_sector = to_addr +
                                        conf->mirrors[i].rdev->data_offset;
                                bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 
                                r10_bio->devs[0].devnum = d;
+                               r10_bio->devs[0].addr = from_addr;
                                r10_bio->devs[1].devnum = i;
+                               r10_bio->devs[1].addr = to_addr;
 
                                break;
                        }
                        if (j == conf->copies) {
-                               /* Cannot recover, so abort the recovery */
+                               /* Cannot recover, so abort the recovery or
+                                * record a bad block */
                                put_buf(r10_bio);
                                if (rb2)
                                        atomic_dec(&rb2->remaining);
                                r10_bio = rb2;
-                               if (!test_and_set_bit(MD_RECOVERY_INTR,
-                                                     &mddev->recovery))
-                                       printk(KERN_INFO "md/raid10:%s: insufficient "
-                                              "working devices for recovery.\n",
-                                              mdname(mddev));
+                               if (any_working) {
+                                       /* problem is that there are bad blocks
+                                        * on other device(s)
+                                        */
+                                       int k;
+                                       for (k = 0; k < conf->copies; k++)
+                                               if (r10_bio->devs[k].devnum == i)
+                                                       break;
+                                       if (!rdev_set_badblocks(
+                                                   conf->mirrors[i].rdev,
+                                                   r10_bio->devs[k].addr,
+                                                   max_sync, 0))
+                                               any_working = 0;
+                               }
+                               if (!any_working)  {
+                                       if (!test_and_set_bit(MD_RECOVERY_INTR,
+                                                             &mddev->recovery))
+                                               printk(KERN_INFO "md/raid10:%s: insufficient "
+                                                      "working devices for recovery.\n",
+                                                      mdname(mddev));
+                                       conf->mirrors[i].recovery_disabled
+                                               = mddev->recovery_disabled;
+                               }
                                break;
                        }
                }
@@ -1979,12 +2584,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
 
                for (i=0; i<conf->copies; i++) {
                        int d = r10_bio->devs[i].devnum;
+                       sector_t first_bad, sector;
+                       int bad_sectors;
+
                        bio = r10_bio->devs[i].bio;
                        bio->bi_end_io = NULL;
                        clear_bit(BIO_UPTODATE, &bio->bi_flags);
                        if (conf->mirrors[d].rdev == NULL ||
                            test_bit(Faulty, &conf->mirrors[d].rdev->flags))
                                continue;
+                       sector = r10_bio->devs[i].addr;
+                       if (is_badblock(conf->mirrors[d].rdev,
+                                       sector, max_sync,
+                                       &first_bad, &bad_sectors)) {
+                               if (first_bad > sector)
+                                       max_sync = first_bad - sector;
+                               else {
+                                       bad_sectors -= (sector - first_bad);
+                                       if (max_sync > bad_sectors)
+                                               max_sync = max_sync;
+                                       continue;
+                               }
+                       }
                        atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                        atomic_inc(&r10_bio->remaining);
                        bio->bi_next = biolist;
@@ -1992,7 +2613,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
                        bio->bi_private = r10_bio;
                        bio->bi_end_io = end_sync_read;
                        bio->bi_rw = READ;
-                       bio->bi_sector = r10_bio->devs[i].addr +
+                       bio->bi_sector = sector +
                                conf->mirrors[d].rdev->data_offset;
                        bio->bi_bdev = conf->mirrors[d].rdev->bdev;
                        count++;
@@ -2079,7 +2700,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
        return sectors_skipped + nr_sectors;
  giveup:
        /* There is nowhere to write, so all non-sync
-        * drives must be failed, so try the next chunk...
+        * drives must be failed or in resync, all drives
+        * have a bad block, so try the next chunk...
         */
        if (sector_nr + max_sync < max_sector)
                max_sector = sector_nr + max_sync;
@@ -2249,6 +2871,7 @@ static int run(mddev_t *mddev)
                                 (conf->raid_disks / conf->near_copies));
 
        list_for_each_entry(rdev, &mddev->disks, same_set) {
+
                disk_idx = rdev->raid_disk;
                if (disk_idx >= conf->raid_disks
                    || disk_idx < 0)
@@ -2271,7 +2894,7 @@ static int run(mddev_t *mddev)
                disk->head_position = 0;
        }
        /* need to check that every block has at least one working mirror */
-       if (!enough(conf)) {
+       if (!enough(conf, -1)) {
                printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
                       mdname(mddev));
                goto out_free_conf;
index 944b110..79cb52a 100644 (file)
@@ -6,6 +6,11 @@ typedef struct mirror_info mirror_info_t;
 struct mirror_info {
        mdk_rdev_t      *rdev;
        sector_t        head_position;
+       int             recovery_disabled;      /* matches
+                                                * mddev->recovery_disabled
+                                                * when we shouldn't try
+                                                * recovering this device.
+                                                */
 };
 
 typedef struct r10bio_s r10bio_t;
@@ -113,10 +118,26 @@ struct r10bio_s {
  * level, we store IO_BLOCKED in the appropriate 'bios' pointer
  */
 #define IO_BLOCKED ((struct bio*)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context.  So we record
+ * the success by setting devs[n].bio to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
 
 /* bits for r10bio.state */
 #define        R10BIO_Uptodate 0
 #define        R10BIO_IsSync   1
 #define        R10BIO_IsRecover 2
 #define        R10BIO_Degraded 3
+/* Set ReadError on bios that experience a read error
+ * so that raid10d knows what to do with them.
+ */
+#define        R10BIO_ReadError 4
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag.
+ */
+#define        R10BIO_MadeGood 5
+#define        R10BIO_WriteError 6
 #endif
index b72edf3..dbae459 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/seq_file.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include "md.h"
 #include "raid5.h"
 #include "raid0.h"
@@ -96,8 +97,6 @@
 #define __inline__
 #endif
 
-#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
-
 /*
  * We maintain a biased count of active stripes in the bottom 16 bits of
  * bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -341,7 +340,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
                               (unsigned long long)sh->sector, i, dev->toread,
                               dev->read, dev->towrite, dev->written,
                               test_bit(R5_LOCKED, &dev->flags));
-                       BUG();
+                       WARN_ON(1);
                }
                dev->flags = 0;
                raid5_build_block(sh, i, previous);
@@ -527,6 +526,36 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        atomic_inc(&rdev->nr_pending);
                rcu_read_unlock();
 
+               /* We have already checked bad blocks for reads.  Now
+                * need to check for writes.
+                */
+               while ((rw & WRITE) && rdev &&
+                      test_bit(WriteErrorSeen, &rdev->flags)) {
+                       sector_t first_bad;
+                       int bad_sectors;
+                       int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+                                             &first_bad, &bad_sectors);
+                       if (!bad)
+                               break;
+
+                       if (bad < 0) {
+                               set_bit(BlockedBadBlocks, &rdev->flags);
+                               if (!conf->mddev->external &&
+                                   conf->mddev->flags) {
+                                       /* It is very unlikely, but we might
+                                        * still need to write out the
+                                        * bad block log - better give it
+                                        * a chance*/
+                                       md_check_recovery(conf->mddev);
+                               }
+                               md_wait_for_blocked_rdev(rdev, conf->mddev);
+                       } else {
+                               /* Acknowledged bad block - skip the write */
+                               rdev_dec_pending(rdev, conf->mddev);
+                               rdev = NULL;
+                       }
+               }
+
                if (rdev) {
                        if (s->syncing || s->expanding || s->expanded)
                                md_sync_acct(rdev->bdev, STRIPE_SECTORS);
@@ -548,10 +577,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        bi->bi_io_vec[0].bv_offset = 0;
                        bi->bi_size = STRIPE_SIZE;
                        bi->bi_next = NULL;
-                       if ((rw & WRITE) &&
-                           test_bit(R5_ReWrite, &sh->dev[i].flags))
-                               atomic_add(STRIPE_SECTORS,
-                                       &rdev->corrected_errors);
                        generic_make_request(bi);
                } else {
                        if (rw & WRITE)
@@ -1020,12 +1045,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
                        struct bio *wbi;
 
-                       spin_lock(&sh->lock);
+                       spin_lock_irq(&sh->raid_conf->device_lock);
                        chosen = dev->towrite;
                        dev->towrite = NULL;
                        BUG_ON(dev->written);
                        wbi = dev->written = chosen;
-                       spin_unlock(&sh->lock);
+                       spin_unlock_irq(&sh->raid_conf->device_lock);
 
                        while (wbi && wbi->bi_sector <
                                dev->sector + STRIPE_SECTORS) {
@@ -1315,12 +1340,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 static int grow_one_stripe(raid5_conf_t *conf)
 {
        struct stripe_head *sh;
-       sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
+       sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
        if (!sh)
                return 0;
-       memset(sh, 0, sizeof(*sh) + (conf->pool_size-1)*sizeof(struct r5dev));
+
        sh->raid_conf = conf;
-       spin_lock_init(&sh->lock);
        #ifdef CONFIG_MULTICORE_RAID456
        init_waitqueue_head(&sh->ops.wait_for_ops);
        #endif
@@ -1435,14 +1459,11 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
                return -ENOMEM;
 
        for (i = conf->max_nr_stripes; i; i--) {
-               nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+               nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
                if (!nsh)
                        break;
 
-               memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
-
                nsh->raid_conf = conf;
-               spin_lock_init(&nsh->lock);
                #ifdef CONFIG_MULTICORE_RAID456
                init_waitqueue_head(&nsh->ops.wait_for_ops);
                #endif
@@ -1587,12 +1608,15 @@ static void raid5_end_read_request(struct bio * bi, int error)
                set_bit(R5_UPTODATE, &sh->dev[i].flags);
                if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                        rdev = conf->disks[i].rdev;
-                       printk_rl(KERN_INFO "md/raid:%s: read error corrected"
-                                 " (%lu sectors at %llu on %s)\n",
-                                 mdname(conf->mddev), STRIPE_SECTORS,
-                                 (unsigned long long)(sh->sector
-                                                      + rdev->data_offset),
-                                 bdevname(rdev->bdev, b));
+                       printk_ratelimited(
+                               KERN_INFO
+                               "md/raid:%s: read error corrected"
+                               " (%lu sectors at %llu on %s)\n",
+                               mdname(conf->mddev), STRIPE_SECTORS,
+                               (unsigned long long)(sh->sector
+                                                    + rdev->data_offset),
+                               bdevname(rdev->bdev, b));
+                       atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
                        clear_bit(R5_ReadError, &sh->dev[i].flags);
                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
                }
@@ -1606,22 +1630,24 @@ static void raid5_end_read_request(struct bio * bi, int error)
                clear_bit(R5_UPTODATE, &sh->dev[i].flags);
                atomic_inc(&rdev->read_errors);
                if (conf->mddev->degraded >= conf->max_degraded)
-                       printk_rl(KERN_WARNING
-                                 "md/raid:%s: read error not correctable "
-                                 "(sector %llu on %s).\n",
-                                 mdname(conf->mddev),
-                                 (unsigned long long)(sh->sector
-                                                      + rdev->data_offset),
-                                 bdn);
+                       printk_ratelimited(
+                               KERN_WARNING
+                               "md/raid:%s: read error not correctable "
+                               "(sector %llu on %s).\n",
+                               mdname(conf->mddev),
+                               (unsigned long long)(sh->sector
+                                                    + rdev->data_offset),
+                               bdn);
                else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
                        /* Oh, no!!! */
-                       printk_rl(KERN_WARNING
-                                 "md/raid:%s: read error NOT corrected!! "
-                                 "(sector %llu on %s).\n",
-                                 mdname(conf->mddev),
-                                 (unsigned long long)(sh->sector
-                                                      + rdev->data_offset),
-                                 bdn);
+                       printk_ratelimited(
+                               KERN_WARNING
+                               "md/raid:%s: read error NOT corrected!! "
+                               "(sector %llu on %s).\n",
+                               mdname(conf->mddev),
+                               (unsigned long long)(sh->sector
+                                                    + rdev->data_offset),
+                               bdn);
                else if (atomic_read(&rdev->read_errors)
                         > conf->max_nr_stripes)
                        printk(KERN_WARNING
@@ -1649,6 +1675,8 @@ static void raid5_end_write_request(struct bio *bi, int error)
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks, i;
        int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+       sector_t first_bad;
+       int bad_sectors;
 
        for (i=0 ; i<disks; i++)
                if (bi == &sh->dev[i].req)
@@ -1662,8 +1690,12 @@ static void raid5_end_write_request(struct bio *bi, int error)
                return;
        }
 
-       if (!uptodate)
-               md_error(conf->mddev, conf->disks[i].rdev);
+       if (!uptodate) {
+               set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
+               set_bit(R5_WriteError, &sh->dev[i].flags);
+       } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
+                              &first_bad, &bad_sectors))
+               set_bit(R5_MadeGood, &sh->dev[i].flags);
 
        rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
        
@@ -1710,6 +1742,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 */
                set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        }
+       set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
        printk(KERN_ALERT
@@ -1760,7 +1793,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
        /*
         * Select the parity disk based on the user selected algorithm.
         */
-       pd_idx = qd_idx = ~0;
+       pd_idx = qd_idx = -1;
        switch(conf->level) {
        case 4:
                pd_idx = data_disks;
@@ -2143,12 +2176,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
        raid5_conf_t *conf = sh->raid_conf;
        int firstwrite=0;
 
-       pr_debug("adding bh b#%llu to stripe s#%llu\n",
+       pr_debug("adding bi b#%llu to stripe s#%llu\n",
                (unsigned long long)bi->bi_sector,
                (unsigned long long)sh->sector);
 
 
-       spin_lock(&sh->lock);
        spin_lock_irq(&conf->device_lock);
        if (forwrite) {
                bip = &sh->dev[dd_idx].towrite;
@@ -2169,19 +2201,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                bi->bi_next = *bip;
        *bip = bi;
        bi->bi_phys_segments++;
-       spin_unlock_irq(&conf->device_lock);
-       spin_unlock(&sh->lock);
-
-       pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
-               (unsigned long long)bi->bi_sector,
-               (unsigned long long)sh->sector, dd_idx);
-
-       if (conf->mddev->bitmap && firstwrite) {
-               bitmap_startwrite(conf->mddev->bitmap, sh->sector,
-                                 STRIPE_SECTORS, 0);
-               sh->bm_seq = conf->seq_flush+1;
-               set_bit(STRIPE_BIT_DELAY, &sh->state);
-       }
 
        if (forwrite) {
                /* check if page is covered */
@@ -2196,12 +2215,23 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
                        set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
        }
+       spin_unlock_irq(&conf->device_lock);
+
+       pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
+               (unsigned long long)(*bip)->bi_sector,
+               (unsigned long long)sh->sector, dd_idx);
+
+       if (conf->mddev->bitmap && firstwrite) {
+               bitmap_startwrite(conf->mddev->bitmap, sh->sector,
+                                 STRIPE_SECTORS, 0);
+               sh->bm_seq = conf->seq_flush+1;
+               set_bit(STRIPE_BIT_DELAY, &sh->state);
+       }
        return 1;
 
  overlap:
        set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
        spin_unlock_irq(&conf->device_lock);
-       spin_unlock(&sh->lock);
        return 0;
 }
 
@@ -2238,9 +2268,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
                        rcu_read_lock();
                        rdev = rcu_dereference(conf->disks[i].rdev);
                        if (rdev && test_bit(In_sync, &rdev->flags))
-                               /* multiple read failures in one stripe */
-                               md_error(conf->mddev, rdev);
+                               atomic_inc(&rdev->nr_pending);
+                       else
+                               rdev = NULL;
                        rcu_read_unlock();
+                       if (rdev) {
+                               if (!rdev_set_badblocks(
+                                           rdev,
+                                           sh->sector,
+                                           STRIPE_SECTORS, 0))
+                                       md_error(conf->mddev, rdev);
+                               rdev_dec_pending(rdev, conf->mddev);
+                       }
                }
                spin_lock_irq(&conf->device_lock);
                /* fail all writes first */
@@ -2308,6 +2347,10 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
                if (bitmap_end)
                        bitmap_endwrite(conf->mddev->bitmap, sh->sector,
                                        STRIPE_SECTORS, 0, 0);
+               /* If we were in the middle of a write the parity block might
+                * still be locked - so just clear all R5_LOCKED flags
+                */
+               clear_bit(R5_LOCKED, &sh->dev[i].flags);
        }
 
        if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -2315,109 +2358,73 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
                        md_wakeup_thread(conf->mddev->thread);
 }
 
-/* fetch_block5 - checks the given member device to see if its data needs
- * to be read or computed to satisfy a request.
- *
- * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill5 to continue
- */
-static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
-                       int disk_idx, int disks)
-{
-       struct r5dev *dev = &sh->dev[disk_idx];
-       struct r5dev *failed_dev = &sh->dev[s->failed_num];
-
-       /* is the data in this block needed, and can we get it? */
-       if (!test_bit(R5_LOCKED, &dev->flags) &&
-           !test_bit(R5_UPTODATE, &dev->flags) &&
-           (dev->toread ||
-            (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-            s->syncing || s->expanding ||
-            (s->failed &&
-             (failed_dev->toread ||
-              (failed_dev->towrite &&
-               !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) {
-               /* We would like to get this block, possibly by computing it,
-                * otherwise read it if the backing disk is insync
-                */
-               if ((s->uptodate == disks - 1) &&
-                   (s->failed && disk_idx == s->failed_num)) {
-                       set_bit(STRIPE_COMPUTE_RUN, &sh->state);
-                       set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
-                       set_bit(R5_Wantcompute, &dev->flags);
-                       sh->ops.target = disk_idx;
-                       sh->ops.target2 = -1;
-                       s->req_compute = 1;
-                       /* Careful: from this point on 'uptodate' is in the eye
-                        * of raid_run_ops which services 'compute' operations
-                        * before writes. R5_Wantcompute flags a block that will
-                        * be R5_UPTODATE by the time it is needed for a
-                        * subsequent operation.
-                        */
-                       s->uptodate++;
-                       return 1; /* uptodate + compute == disks */
-               } else if (test_bit(R5_Insync, &dev->flags)) {
-                       set_bit(R5_LOCKED, &dev->flags);
-                       set_bit(R5_Wantread, &dev->flags);
-                       s->locked++;
-                       pr_debug("Reading block %d (sync=%d)\n", disk_idx,
-                               s->syncing);
-               }
-       }
-
-       return 0;
-}
-
-/**
- * handle_stripe_fill5 - read or compute data to satisfy pending requests.
- */
-static void handle_stripe_fill5(struct stripe_head *sh,
-                       struct stripe_head_state *s, int disks)
+static void
+handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
+                  struct stripe_head_state *s)
 {
+       int abort = 0;
        int i;
 
-       /* look for blocks to read/compute, skip this if a compute
-        * is already in flight, or if the stripe contents are in the
-        * midst of changing due to a write
+       md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
+       clear_bit(STRIPE_SYNCING, &sh->state);
+       s->syncing = 0;
+       /* There is nothing more to do for sync/check/repair.
+        * For recover we need to record a bad block on all
+        * non-sync devices, or abort the recovery
         */
-       if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
-           !sh->reconstruct_state)
-               for (i = disks; i--; )
-                       if (fetch_block5(sh, s, i, disks))
-                               break;
-       set_bit(STRIPE_HANDLE, &sh->state);
+       if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
+               return;
+       /* During recovery devices cannot be removed, so locking and
+        * refcounting of rdevs is not needed
+        */
+       for (i = 0; i < conf->raid_disks; i++) {
+               mdk_rdev_t *rdev = conf->disks[i].rdev;
+               if (!rdev
+                   || test_bit(Faulty, &rdev->flags)
+                   || test_bit(In_sync, &rdev->flags))
+                       continue;
+               if (!rdev_set_badblocks(rdev, sh->sector,
+                                       STRIPE_SECTORS, 0))
+                       abort = 1;
+       }
+       if (abort) {
+               conf->recovery_disabled = conf->mddev->recovery_disabled;
+               set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+       }
 }
 
-/* fetch_block6 - checks the given member device to see if its data needs
+/* fetch_block - checks the given member device to see if its data needs
  * to be read or computed to satisfy a request.
  *
  * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill6 to continue
+ * 0 to tell the loop in handle_stripe_fill to continue
  */
-static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
-                        struct r6_state *r6s, int disk_idx, int disks)
+static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
+                      int disk_idx, int disks)
 {
        struct r5dev *dev = &sh->dev[disk_idx];
-       struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
-                                 &sh->dev[r6s->failed_num[1]] };
+       struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
+                                 &sh->dev[s->failed_num[1]] };
 
+       /* is the data in this block needed, and can we get it? */
        if (!test_bit(R5_LOCKED, &dev->flags) &&
            !test_bit(R5_UPTODATE, &dev->flags) &&
            (dev->toread ||
             (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
             s->syncing || s->expanding ||
-            (s->failed >= 1 &&
-             (fdev[0]->toread || s->to_write)) ||
-            (s->failed >= 2 &&
-             (fdev[1]->toread || s->to_write)))) {
+            (s->failed >= 1 && fdev[0]->toread) ||
+            (s->failed >= 2 && fdev[1]->toread) ||
+            (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
+             !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
+            (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
                /* we would like to get this block, possibly by computing it,
                 * otherwise read it if the backing disk is insync
                 */
                BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
                BUG_ON(test_bit(R5_Wantread, &dev->flags));
                if ((s->uptodate == disks - 1) &&
-                   (s->failed && (disk_idx == r6s->failed_num[0] ||
-                                  disk_idx == r6s->failed_num[1]))) {
+                   (s->failed && (disk_idx == s->failed_num[0] ||
+                                  disk_idx == s->failed_num[1]))) {
                        /* have disk failed, and we're requested to fetch it;
                         * do compute it
                         */
@@ -2429,6 +2436,12 @@ static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
                        sh->ops.target = disk_idx;
                        sh->ops.target2 = -1; /* no 2nd target */
                        s->req_compute = 1;
+                       /* Careful: from this point on 'uptodate' is in the eye
+                        * of raid_run_ops which services 'compute' operations
+                        * before writes. R5_Wantcompute flags a block that will
+                        * be R5_UPTODATE by the time it is needed for a
+                        * subsequent operation.
+                        */
                        s->uptodate++;
                        return 1;
                } else if (s->uptodate == disks-2 && s->failed >= 2) {
@@ -2469,11 +2482,11 @@ static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
 }
 
 /**
- * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ * handle_stripe_fill - read or compute data to satisfy pending requests.
  */
-static void handle_stripe_fill6(struct stripe_head *sh,
-                       struct stripe_head_state *s, struct r6_state *r6s,
-                       int disks)
+static void handle_stripe_fill(struct stripe_head *sh,
+                              struct stripe_head_state *s,
+                              int disks)
 {
        int i;
 
@@ -2484,7 +2497,7 @@ static void handle_stripe_fill6(struct stripe_head *sh,
        if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
            !sh->reconstruct_state)
                for (i = disks; i--; )
-                       if (fetch_block6(sh, s, r6s, i, disks))
+                       if (fetch_block(sh, s, i, disks))
                                break;
        set_bit(STRIPE_HANDLE, &sh->state);
 }
@@ -2540,11 +2553,19 @@ static void handle_stripe_clean_event(raid5_conf_t *conf,
                        md_wakeup_thread(conf->mddev->thread);
 }
 
-static void handle_stripe_dirtying5(raid5_conf_t *conf,
-               struct stripe_head *sh, struct stripe_head_state *s, int disks)
+static void handle_stripe_dirtying(raid5_conf_t *conf,
+                                  struct stripe_head *sh,
+                                  struct stripe_head_state *s,
+                                  int disks)
 {
        int rmw = 0, rcw = 0, i;
-       for (i = disks; i--; ) {
+       if (conf->max_degraded == 2) {
+               /* RAID6 requires 'rcw' in current implementation
+                * Calculate the real rcw later - for now fake it
+                * look like rcw is cheaper
+                */
+               rcw = 1; rmw = 2;
+       } else for (i = disks; i--; ) {
                /* would I have to read this buffer for read_modify_write */
                struct r5dev *dev = &sh->dev[i];
                if ((dev->towrite || i == sh->pd_idx) &&
@@ -2591,16 +2612,19 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
                                }
                        }
                }
-       if (rcw <= rmw && rcw > 0)
+       if (rcw <= rmw && rcw > 0) {
                /* want reconstruct write, but need to get some data */
+               rcw = 0;
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                        if (!test_bit(R5_OVERWRITE, &dev->flags) &&
-                           i != sh->pd_idx &&
+                           i != sh->pd_idx && i != sh->qd_idx &&
                            !test_bit(R5_LOCKED, &dev->flags) &&
                            !(test_bit(R5_UPTODATE, &dev->flags) ||
-                           test_bit(R5_Wantcompute, &dev->flags)) &&
-                           test_bit(R5_Insync, &dev->flags)) {
+                             test_bit(R5_Wantcompute, &dev->flags))) {
+                               rcw++;
+                               if (!test_bit(R5_Insync, &dev->flags))
+                                       continue; /* it's a failed drive */
                                if (
                                  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
                                        pr_debug("Read_old block "
@@ -2614,6 +2638,7 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
                                }
                        }
                }
+       }
        /* now if nothing is locked, and if we have enough data,
         * we can start a write request
         */
@@ -2630,53 +2655,6 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
                schedule_reconstruction(sh, s, rcw == 0, 0);
 }
 
-static void handle_stripe_dirtying6(raid5_conf_t *conf,
-               struct stripe_head *sh, struct stripe_head_state *s,
-               struct r6_state *r6s, int disks)
-{
-       int rcw = 0, pd_idx = sh->pd_idx, i;
-       int qd_idx = sh->qd_idx;
-
-       set_bit(STRIPE_HANDLE, &sh->state);
-       for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               /* check if we haven't enough data */
-               if (!test_bit(R5_OVERWRITE, &dev->flags) &&
-                   i != pd_idx && i != qd_idx &&
-                   !test_bit(R5_LOCKED, &dev->flags) &&
-                   !(test_bit(R5_UPTODATE, &dev->flags) ||
-                     test_bit(R5_Wantcompute, &dev->flags))) {
-                       rcw++;
-                       if (!test_bit(R5_Insync, &dev->flags))
-                               continue; /* it's a failed drive */
-
-                       if (
-                         test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                               pr_debug("Read_old stripe %llu "
-                                       "block %d for Reconstruct\n",
-                                    (unsigned long long)sh->sector, i);
-                               set_bit(R5_LOCKED, &dev->flags);
-                               set_bit(R5_Wantread, &dev->flags);
-                               s->locked++;
-                       } else {
-                               pr_debug("Request delayed stripe %llu "
-                                       "block %d for Reconstruct\n",
-                                    (unsigned long long)sh->sector, i);
-                               set_bit(STRIPE_DELAYED, &sh->state);
-                               set_bit(STRIPE_HANDLE, &sh->state);
-                       }
-               }
-       }
-       /* now if nothing is locked, and if we have enough data, we can start a
-        * write request
-        */
-       if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
-           s->locked == 0 && rcw == 0 &&
-           !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-               schedule_reconstruction(sh, s, 1, 0);
-       }
-}
-
 static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                struct stripe_head_state *s, int disks)
 {
@@ -2695,7 +2673,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                        s->uptodate--;
                        break;
                }
-               dev = &sh->dev[s->failed_num];
+               dev = &sh->dev[s->failed_num[0]];
                /* fall through */
        case check_state_compute_result:
                sh->check_state = check_state_idle;
@@ -2767,7 +2745,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 
 static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
                                  struct stripe_head_state *s,
-                                 struct r6_state *r6s, int disks)
+                                 int disks)
 {
        int pd_idx = sh->pd_idx;
        int qd_idx = sh->qd_idx;
@@ -2786,14 +2764,14 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
        switch (sh->check_state) {
        case check_state_idle:
                /* start a new check operation if there are < 2 failures */
-               if (s->failed == r6s->q_failed) {
+               if (s->failed == s->q_failed) {
                        /* The only possible failed device holds Q, so it
                         * makes sense to check P (If anything else were failed,
                         * we would have used P to recreate it).
                         */
                        sh->check_state = check_state_run;
                }
-               if (!r6s->q_failed && s->failed < 2) {
+               if (!s->q_failed && s->failed < 2) {
                        /* Q is not failed, and we didn't use it to generate
                         * anything, so it makes sense to check it
                         */
@@ -2835,13 +2813,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
                 */
                BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
                if (s->failed == 2) {
-                       dev = &sh->dev[r6s->failed_num[1]];
+                       dev = &sh->dev[s->failed_num[1]];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantwrite, &dev->flags);
                }
                if (s->failed >= 1) {
-                       dev = &sh->dev[r6s->failed_num[0]];
+                       dev = &sh->dev[s->failed_num[0]];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantwrite, &dev->flags);
@@ -2928,8 +2906,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
        }
 }
 
-static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
-                               struct r6_state *r6s)
+static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh)
 {
        int i;
 
@@ -2971,7 +2948,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                        set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
                        for (j = 0; j < conf->raid_disks; j++)
                                if (j != sh2->pd_idx &&
-                                   (!r6s || j != sh2->qd_idx) &&
+                                   j != sh2->qd_idx &&
                                    !test_bit(R5_Expanded, &sh2->dev[j].flags))
                                        break;
                        if (j == conf->raid_disks) {
@@ -3006,43 +2983,35 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
  *
  */
 
-static void handle_stripe5(struct stripe_head *sh)
+static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 {
        raid5_conf_t *conf = sh->raid_conf;
-       int disks = sh->disks, i;
-       struct bio *return_bi = NULL;
-       struct stripe_head_state s;
+       int disks = sh->disks;
        struct r5dev *dev;
-       mdk_rdev_t *blocked_rdev = NULL;
-       int prexor;
-       int dec_preread_active = 0;
+       int i;
 
-       memset(&s, 0, sizeof(s));
-       pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
-                "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state,
-                atomic_read(&sh->count), sh->pd_idx, sh->check_state,
-                sh->reconstruct_state);
+       memset(s, 0, sizeof(*s));
 
-       spin_lock(&sh->lock);
-       clear_bit(STRIPE_HANDLE, &sh->state);
-       clear_bit(STRIPE_DELAYED, &sh->state);
-
-       s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
-       s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-       s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+       s->syncing = test_bit(STRIPE_SYNCING, &sh->state);
+       s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+       s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+       s->failed_num[0] = -1;
+       s->failed_num[1] = -1;
 
        /* Now to look around and see what can be done */
        rcu_read_lock();
+       spin_lock_irq(&conf->device_lock);
        for (i=disks; i--; ) {
                mdk_rdev_t *rdev;
+               sector_t first_bad;
+               int bad_sectors;
+               int is_bad = 0;
 
                dev = &sh->dev[i];
 
-               pr_debug("check %d: state 0x%lx toread %p read %p write %p "
-                       "written %p\n", i, dev->flags, dev->toread, dev->read,
-                       dev->towrite, dev->written);
-
-               /* maybe we can request a biofill operation
+               pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
+                       i, dev->flags, dev->toread, dev->towrite, dev->written);
+               /* maybe we can reply to a read
                 *
                 * new wantfill requests are only permitted while
                 * ops_complete_biofill is guaranteed to be inactive
@@ -3052,37 +3021,74 @@ static void handle_stripe5(struct stripe_head *sh)
                        set_bit(R5_Wantfill, &dev->flags);
 
                /* now count some things */
-               if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
-               if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
-               if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
+               if (test_bit(R5_LOCKED, &dev->flags))
+                       s->locked++;
+               if (test_bit(R5_UPTODATE, &dev->flags))
+                       s->uptodate++;
+               if (test_bit(R5_Wantcompute, &dev->flags)) {
+                       s->compute++;
+                       BUG_ON(s->compute > 2);
+               }
 
                if (test_bit(R5_Wantfill, &dev->flags))
-                       s.to_fill++;
+                       s->to_fill++;
                else if (dev->toread)
-                       s.to_read++;
+                       s->to_read++;
                if (dev->towrite) {
-                       s.to_write++;
+                       s->to_write++;
                        if (!test_bit(R5_OVERWRITE, &dev->flags))
-                               s.non_overwrite++;
+                               s->non_overwrite++;
                }
                if (dev->written)
-                       s.written++;
+                       s->written++;
                rdev = rcu_dereference(conf->disks[i].rdev);
-               if (blocked_rdev == NULL &&
-                   rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-                       blocked_rdev = rdev;
-                       atomic_inc(&rdev->nr_pending);
+               if (rdev) {
+                       is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+                                            &first_bad, &bad_sectors);
+                       if (s->blocked_rdev == NULL
+                           && (test_bit(Blocked, &rdev->flags)
+                               || is_bad < 0)) {
+                               if (is_bad < 0)
+                                       set_bit(BlockedBadBlocks,
+                                               &rdev->flags);
+                               s->blocked_rdev = rdev;
+                               atomic_inc(&rdev->nr_pending);
+                       }
                }
                clear_bit(R5_Insync, &dev->flags);
                if (!rdev)
                        /* Not in-sync */;
-               else if (test_bit(In_sync, &rdev->flags))
+               else if (is_bad) {
+                       /* also not in-sync */
+                       if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+                               /* treat as in-sync, but with a read error
+                                * which we can now try to correct
+                                */
+                               set_bit(R5_Insync, &dev->flags);
+                               set_bit(R5_ReadError, &dev->flags);
+                       }
+               } else if (test_bit(In_sync, &rdev->flags))
                        set_bit(R5_Insync, &dev->flags);
                else {
-                       /* could be in-sync depending on recovery/reshape status */
+                       /* in sync if before recovery_offset */
                        if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
                                set_bit(R5_Insync, &dev->flags);
                }
+               if (test_bit(R5_WriteError, &dev->flags)) {
+                       clear_bit(R5_Insync, &dev->flags);
+                       if (!test_bit(Faulty, &rdev->flags)) {
+                               s->handle_bad_blocks = 1;
+                               atomic_inc(&rdev->nr_pending);
+                       } else
+                               clear_bit(R5_WriteError, &dev->flags);
+               }
+               if (test_bit(R5_MadeGood, &dev->flags)) {
+                       if (!test_bit(Faulty, &rdev->flags)) {
+                               s->handle_bad_blocks = 1;
+                               atomic_inc(&rdev->nr_pending);
+                       } else
+                               clear_bit(R5_MadeGood, &dev->flags);
+               }
                if (!test_bit(R5_Insync, &dev->flags)) {
                        /* The ReadError flag will just be confusing now */
                        clear_bit(R5_ReadError, &dev->flags);
@@ -3091,313 +3097,60 @@ static void handle_stripe5(struct stripe_head *sh)
                if (test_bit(R5_ReadError, &dev->flags))
                        clear_bit(R5_Insync, &dev->flags);
                if (!test_bit(R5_Insync, &dev->flags)) {
-                       s.failed++;
-                       s.failed_num = i;
+                       if (s->failed < 2)
+                               s->failed_num[s->failed] = i;
+                       s->failed++;
                }
        }
+       spin_unlock_irq(&conf->device_lock);
        rcu_read_unlock();
-
-       if (unlikely(blocked_rdev)) {
-               if (s.syncing || s.expanding || s.expanded ||
-                   s.to_write || s.written) {
-                       set_bit(STRIPE_HANDLE, &sh->state);
-                       goto unlock;
-               }
-               /* There is nothing for the blocked_rdev to block */
-               rdev_dec_pending(blocked_rdev, conf->mddev);
-               blocked_rdev = NULL;
-       }
-
-       if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
-               set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
-               set_bit(STRIPE_BIOFILL_RUN, &sh->state);
-       }
-
-       pr_debug("locked=%d uptodate=%d to_read=%d"
-               " to_write=%d failed=%d failed_num=%d\n",
-               s.locked, s.uptodate, s.to_read, s.to_write,
-               s.failed, s.failed_num);
-       /* check if the array has lost two devices and, if so, some requests might
-        * need to be failed
-        */
-       if (s.failed > 1 && s.to_read+s.to_write+s.written)
-               handle_failed_stripe(conf, sh, &s, disks, &return_bi);
-       if (s.failed > 1 && s.syncing) {
-               md_done_sync(conf->mddev, STRIPE_SECTORS,0);
-               clear_bit(STRIPE_SYNCING, &sh->state);
-               s.syncing = 0;
-       }
-
-       /* might be able to return some write requests if the parity block
-        * is safe, or on a failed drive
-        */
-       dev = &sh->dev[sh->pd_idx];
-       if ( s.written &&
-            ((test_bit(R5_Insync, &dev->flags) &&
-              !test_bit(R5_LOCKED, &dev->flags) &&
-              test_bit(R5_UPTODATE, &dev->flags)) ||
-              (s.failed == 1 && s.failed_num == sh->pd_idx)))
-               handle_stripe_clean_event(conf, sh, disks, &return_bi);
-
-       /* Now we might consider reading some blocks, either to check/generate
-        * parity, or to satisfy requests
-        * or to load a block that is being partially written.
-        */
-       if (s.to_read || s.non_overwrite ||
-           (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
-               handle_stripe_fill5(sh, &s, disks);
-
-       /* Now we check to see if any write operations have recently
-        * completed
-        */
-       prexor = 0;
-       if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
-               prexor = 1;
-       if (sh->reconstruct_state == reconstruct_state_drain_result ||
-           sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
-               sh->reconstruct_state = reconstruct_state_idle;
-
-               /* All the 'written' buffers and the parity block are ready to
-                * be written back to disk
-                */
-               BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
-               for (i = disks; i--; ) {
-                       dev = &sh->dev[i];
-                       if (test_bit(R5_LOCKED, &dev->flags) &&
-                               (i == sh->pd_idx || dev->written)) {
-                               pr_debug("Writing block %d\n", i);
-                               set_bit(R5_Wantwrite, &dev->flags);
-                               if (prexor)
-                                       continue;
-                               if (!test_bit(R5_Insync, &dev->flags) ||
-                                   (i == sh->pd_idx && s.failed == 0))
-                                       set_bit(STRIPE_INSYNC, &sh->state);
-                       }
-               }
-               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-                       dec_preread_active = 1;
-       }
-
-       /* Now to consider new write requests and what else, if anything
-        * should be read.  We do not handle new writes when:
-        * 1/ A 'write' operation (copy+xor) is already in flight.
-        * 2/ A 'check' operation is in flight, as it may clobber the parity
-        *    block.
-        */
-       if (s.to_write && !sh->reconstruct_state && !sh->check_state)
-               handle_stripe_dirtying5(conf, sh, &s, disks);
-
-       /* maybe we need to check and possibly fix the parity for this stripe
-        * Any reads will already have been scheduled, so we just see if enough
-        * data is available.  The parity check is held off while parity
-        * dependent operations are in flight.
-        */
-       if (sh->check_state ||
-           (s.syncing && s.locked == 0 &&
-            !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
-            !test_bit(STRIPE_INSYNC, &sh->state)))
-               handle_parity_checks5(conf, sh, &s, disks);
-
-       if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
-               md_done_sync(conf->mddev, STRIPE_SECTORS,1);
-               clear_bit(STRIPE_SYNCING, &sh->state);
-       }
-
-       /* If the failed drive is just a ReadError, then we might need to progress
-        * the repair/check process
-        */
-       if (s.failed == 1 && !conf->mddev->ro &&
-           test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
-           && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
-           && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
-               ) {
-               dev = &sh->dev[s.failed_num];
-               if (!test_bit(R5_ReWrite, &dev->flags)) {
-                       set_bit(R5_Wantwrite, &dev->flags);
-                       set_bit(R5_ReWrite, &dev->flags);
-                       set_bit(R5_LOCKED, &dev->flags);
-                       s.locked++;
-               } else {
-                       /* let's read it back */
-                       set_bit(R5_Wantread, &dev->flags);
-                       set_bit(R5_LOCKED, &dev->flags);
-                       s.locked++;
-               }
-       }
-
-       /* Finish reconstruct operations initiated by the expansion process */
-       if (sh->reconstruct_state == reconstruct_state_result) {
-               struct stripe_head *sh2
-                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
-               if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
-                       /* sh cannot be written until sh2 has been read.
-                        * so arrange for sh to be delayed a little
-                        */
-                       set_bit(STRIPE_DELAYED, &sh->state);
-                       set_bit(STRIPE_HANDLE, &sh->state);
-                       if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
-                                             &sh2->state))
-                               atomic_inc(&conf->preread_active_stripes);
-                       release_stripe(sh2);
-                       goto unlock;
-               }
-               if (sh2)
-                       release_stripe(sh2);
-
-               sh->reconstruct_state = reconstruct_state_idle;
-               clear_bit(STRIPE_EXPANDING, &sh->state);
-               for (i = conf->raid_disks; i--; ) {
-                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
-                       set_bit(R5_LOCKED, &sh->dev[i].flags);
-                       s.locked++;
-               }
-       }
-
-       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
-           !sh->reconstruct_state) {
-               /* Need to write out all blocks after computing parity */
-               sh->disks = conf->raid_disks;
-               stripe_set_idx(sh->sector, conf, 0, sh);
-               schedule_reconstruction(sh, &s, 1, 1);
-       } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
-               clear_bit(STRIPE_EXPAND_READY, &sh->state);
-               atomic_dec(&conf->reshape_stripes);
-               wake_up(&conf->wait_for_overlap);
-               md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
-       }
-
-       if (s.expanding && s.locked == 0 &&
-           !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
-               handle_stripe_expansion(conf, sh, NULL);
-
- unlock:
-       spin_unlock(&sh->lock);
-
-       /* wait for this device to become unblocked */
-       if (unlikely(blocked_rdev))
-               md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
-
-       if (s.ops_request)
-               raid_run_ops(sh, s.ops_request);
-
-       ops_run_io(sh, &s);
-
-       if (dec_preread_active) {
-               /* We delay this until after ops_run_io so that if make_request
-                * is waiting on a flush, it won't continue until the writes
-                * have actually been submitted.
-                */
-               atomic_dec(&conf->preread_active_stripes);
-               if (atomic_read(&conf->preread_active_stripes) <
-                   IO_THRESHOLD)
-                       md_wakeup_thread(conf->mddev->thread);
-       }
-       return_io(return_bi);
 }
 
-static void handle_stripe6(struct stripe_head *sh)
+static void handle_stripe(struct stripe_head *sh)
 {
+       struct stripe_head_state s;
        raid5_conf_t *conf = sh->raid_conf;
+       int i;
+       int prexor;
        int disks = sh->disks;
-       struct bio *return_bi = NULL;
-       int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
-       struct stripe_head_state s;
-       struct r6_state r6s;
-       struct r5dev *dev, *pdev, *qdev;
-       mdk_rdev_t *blocked_rdev = NULL;
-       int dec_preread_active = 0;
+       struct r5dev *pdev, *qdev;
+
+       clear_bit(STRIPE_HANDLE, &sh->state);
+       if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
+               /* already being handled, ensure it gets handled
+                * again when current action finishes */
+               set_bit(STRIPE_HANDLE, &sh->state);
+               return;
+       }
+
+       if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+               set_bit(STRIPE_SYNCING, &sh->state);
+               clear_bit(STRIPE_INSYNC, &sh->state);
+       }
+       clear_bit(STRIPE_DELAYED, &sh->state);
 
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
                "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
               (unsigned long long)sh->sector, sh->state,
-              atomic_read(&sh->count), pd_idx, qd_idx,
+              atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
               sh->check_state, sh->reconstruct_state);
-       memset(&s, 0, sizeof(s));
-
-       spin_lock(&sh->lock);
-       clear_bit(STRIPE_HANDLE, &sh->state);
-       clear_bit(STRIPE_DELAYED, &sh->state);
-
-       s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
-       s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-       s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
-       /* Now to look around and see what can be done */
-
-       rcu_read_lock();
-       for (i=disks; i--; ) {
-               mdk_rdev_t *rdev;
-               dev = &sh->dev[i];
 
-               pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
-                       i, dev->flags, dev->toread, dev->towrite, dev->written);
-               /* maybe we can reply to a read
-                *
-                * new wantfill requests are only permitted while
-                * ops_complete_biofill is guaranteed to be inactive
-                */
-               if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
-                   !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
-                       set_bit(R5_Wantfill, &dev->flags);
+       analyse_stripe(sh, &s);
 
-               /* now count some things */
-               if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
-               if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
-               if (test_bit(R5_Wantcompute, &dev->flags)) {
-                       s.compute++;
-                       BUG_ON(s.compute > 2);
-               }
-
-               if (test_bit(R5_Wantfill, &dev->flags)) {
-                       s.to_fill++;
-               } else if (dev->toread)
-                       s.to_read++;
-               if (dev->towrite) {
-                       s.to_write++;
-                       if (!test_bit(R5_OVERWRITE, &dev->flags))
-                               s.non_overwrite++;
-               }
-               if (dev->written)
-                       s.written++;
-               rdev = rcu_dereference(conf->disks[i].rdev);
-               if (blocked_rdev == NULL &&
-                   rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-                       blocked_rdev = rdev;
-                       atomic_inc(&rdev->nr_pending);
-               }
-               clear_bit(R5_Insync, &dev->flags);
-               if (!rdev)
-                       /* Not in-sync */;
-               else if (test_bit(In_sync, &rdev->flags))
-                       set_bit(R5_Insync, &dev->flags);
-               else {
-                       /* in sync if before recovery_offset */
-                       if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
-                               set_bit(R5_Insync, &dev->flags);
-               }
-               if (!test_bit(R5_Insync, &dev->flags)) {
-                       /* The ReadError flag will just be confusing now */
-                       clear_bit(R5_ReadError, &dev->flags);
-                       clear_bit(R5_ReWrite, &dev->flags);
-               }
-               if (test_bit(R5_ReadError, &dev->flags))
-                       clear_bit(R5_Insync, &dev->flags);
-               if (!test_bit(R5_Insync, &dev->flags)) {
-                       if (s.failed < 2)
-                               r6s.failed_num[s.failed] = i;
-                       s.failed++;
-               }
+       if (s.handle_bad_blocks) {
+               set_bit(STRIPE_HANDLE, &sh->state);
+               goto finish;
        }
-       rcu_read_unlock();
 
-       if (unlikely(blocked_rdev)) {
+       if (unlikely(s.blocked_rdev)) {
                if (s.syncing || s.expanding || s.expanded ||
                    s.to_write || s.written) {
                        set_bit(STRIPE_HANDLE, &sh->state);
-                       goto unlock;
+                       goto finish;
                }
                /* There is nothing for the blocked_rdev to block */
-               rdev_dec_pending(blocked_rdev, conf->mddev);
-               blocked_rdev = NULL;
+               rdev_dec_pending(s.blocked_rdev, conf->mddev);
+               s.blocked_rdev = NULL;
        }
 
        if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
@@ -3408,83 +3161,88 @@ static void handle_stripe6(struct stripe_head *sh)
        pr_debug("locked=%d uptodate=%d to_read=%d"
               " to_write=%d failed=%d failed_num=%d,%d\n",
               s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
-              r6s.failed_num[0], r6s.failed_num[1]);
-       /* check if the array has lost >2 devices and, if so, some requests
-        * might need to be failed
+              s.failed_num[0], s.failed_num[1]);
+       /* check if the array has lost more than max_degraded devices and,
+        * if so, some requests might need to be failed.
         */
-       if (s.failed > 2 && s.to_read+s.to_write+s.written)
-               handle_failed_stripe(conf, sh, &s, disks, &return_bi);
-       if (s.failed > 2 && s.syncing) {
-               md_done_sync(conf->mddev, STRIPE_SECTORS,0);
-               clear_bit(STRIPE_SYNCING, &sh->state);
-               s.syncing = 0;
-       }
+       if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
+               handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
+       if (s.failed > conf->max_degraded && s.syncing)
+               handle_failed_sync(conf, sh, &s);
 
        /*
         * might be able to return some write requests if the parity blocks
         * are safe, or on a failed drive
         */
-       pdev = &sh->dev[pd_idx];
-       r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
-               || (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
-       qdev = &sh->dev[qd_idx];
-       r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
-               || (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
-
-       if ( s.written &&
-            ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+       pdev = &sh->dev[sh->pd_idx];
+       s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+               || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+       qdev = &sh->dev[sh->qd_idx];
+       s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+               || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+               || conf->level < 6;
+
+       if (s.written &&
+           (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
                             && !test_bit(R5_LOCKED, &pdev->flags)
                             && test_bit(R5_UPTODATE, &pdev->flags)))) &&
-            ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+           (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
                             && !test_bit(R5_LOCKED, &qdev->flags)
                             && test_bit(R5_UPTODATE, &qdev->flags)))))
-               handle_stripe_clean_event(conf, sh, disks, &return_bi);
+               handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
 
        /* Now we might consider reading some blocks, either to check/generate
         * parity, or to satisfy requests
         * or to load a block that is being partially written.
         */
-       if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
-           (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
-               handle_stripe_fill6(sh, &s, &r6s, disks);
+       if (s.to_read || s.non_overwrite
+           || (conf->level == 6 && s.to_write && s.failed)
+           || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
+               handle_stripe_fill(sh, &s, disks);
 
        /* Now we check to see if any write operations have recently
         * completed
         */
-       if (sh->reconstruct_state == reconstruct_state_drain_result) {
-
+       prexor = 0;
+       if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
+               prexor = 1;
+       if (sh->reconstruct_state == reconstruct_state_drain_result ||
+           sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
                sh->reconstruct_state = reconstruct_state_idle;
-               /* All the 'written' buffers and the parity blocks are ready to
+
+               /* All the 'written' buffers and the parity block are ready to
                 * be written back to disk
                 */
                BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
-               BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+               BUG_ON(sh->qd_idx >= 0 &&
+                      !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags));
                for (i = disks; i--; ) {
-                       dev = &sh->dev[i];
+                       struct r5dev *dev = &sh->dev[i];
                        if (test_bit(R5_LOCKED, &dev->flags) &&
-                           (i == sh->pd_idx || i == qd_idx ||
-                            dev->written)) {
+                               (i == sh->pd_idx || i == sh->qd_idx ||
+                                dev->written)) {
                                pr_debug("Writing block %d\n", i);
-                               BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
                                set_bit(R5_Wantwrite, &dev->flags);
+                               if (prexor)
+                                       continue;
                                if (!test_bit(R5_Insync, &dev->flags) ||
-                                   ((i == sh->pd_idx || i == qd_idx) &&
-                                     s.failed == 0))
+                                   ((i == sh->pd_idx || i == sh->qd_idx)  &&
+                                    s.failed == 0))
                                        set_bit(STRIPE_INSYNC, &sh->state);
                        }
                }
                if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-                       dec_preread_active = 1;
+                       s.dec_preread_active = 1;
        }
 
        /* Now to consider new write requests and what else, if anything
         * should be read.  We do not handle new writes when:
-        * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+        * 1/ A 'write' operation (copy+xor) is already in flight.
         * 2/ A 'check' operation is in flight, as it may clobber the parity
         *    block.
         */
        if (s.to_write && !sh->reconstruct_state && !sh->check_state)
-               handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
+               handle_stripe_dirtying(conf, sh, &s, disks);
 
        /* maybe we need to check and possibly fix the parity for this stripe
         * Any reads will already have been scheduled, so we just see if enough
@@ -3494,20 +3252,24 @@ static void handle_stripe6(struct stripe_head *sh)
        if (sh->check_state ||
            (s.syncing && s.locked == 0 &&
             !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
-            !test_bit(STRIPE_INSYNC, &sh->state)))
-               handle_parity_checks6(conf, sh, &s, &r6s, disks);
+            !test_bit(STRIPE_INSYNC, &sh->state))) {
+               if (conf->level == 6)
+                       handle_parity_checks6(conf, sh, &s, disks);
+               else
+                       handle_parity_checks5(conf, sh, &s, disks);
+       }
 
        if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
-               md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+               md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
                clear_bit(STRIPE_SYNCING, &sh->state);
        }
 
        /* If the failed drives are just a ReadError, then we might need
         * to progress the repair/check process
         */
-       if (s.failed <= 2 && !conf->mddev->ro)
+       if (s.failed <= conf->max_degraded && !conf->mddev->ro)
                for (i = 0; i < s.failed; i++) {
-                       dev = &sh->dev[r6s.failed_num[i]];
+                       struct r5dev *dev = &sh->dev[s.failed_num[i]];
                        if (test_bit(R5_ReadError, &dev->flags)
                            && !test_bit(R5_LOCKED, &dev->flags)
                            && test_bit(R5_UPTODATE, &dev->flags)
@@ -3526,8 +3288,26 @@ static void handle_stripe6(struct stripe_head *sh)
                        }
                }
 
+
        /* Finish reconstruct operations initiated by the expansion process */
        if (sh->reconstruct_state == reconstruct_state_result) {
+               struct stripe_head *sh_src
+                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
+               if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
+                       /* sh cannot be written until sh_src has been read.
+                        * so arrange for sh to be delayed a little
+                        */
+                       set_bit(STRIPE_DELAYED, &sh->state);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+                                             &sh_src->state))
+                               atomic_inc(&conf->preread_active_stripes);
+                       release_stripe(sh_src);
+                       goto finish;
+               }
+               if (sh_src)
+                       release_stripe(sh_src);
+
                sh->reconstruct_state = reconstruct_state_idle;
                clear_bit(STRIPE_EXPANDING, &sh->state);
                for (i = conf->raid_disks; i--; ) {
@@ -3539,24 +3319,7 @@ static void handle_stripe6(struct stripe_head *sh)
 
        if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
            !sh->reconstruct_state) {
-               struct stripe_head *sh2
-                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
-               if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
-                       /* sh cannot be written until sh2 has been read.
-                        * so arrange for sh to be delayed a little
-                        */
-                       set_bit(STRIPE_DELAYED, &sh->state);
-                       set_bit(STRIPE_HANDLE, &sh->state);
-                       if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
-                                             &sh2->state))
-                               atomic_inc(&conf->preread_active_stripes);
-                       release_stripe(sh2);
-                       goto unlock;
-               }
-               if (sh2)
-                       release_stripe(sh2);
-
-               /* Need to write out all blocks after computing P&Q */
+               /* Need to write out all blocks after computing parity */
                sh->disks = conf->raid_disks;
                stripe_set_idx(sh->sector, conf, 0, sh);
                schedule_reconstruction(sh, &s, 1, 1);
@@ -3569,22 +3332,39 @@ static void handle_stripe6(struct stripe_head *sh)
 
        if (s.expanding && s.locked == 0 &&
            !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
-               handle_stripe_expansion(conf, sh, &r6s);
-
- unlock:
-       spin_unlock(&sh->lock);
+               handle_stripe_expansion(conf, sh);
 
+finish:
        /* wait for this device to become unblocked */
-       if (unlikely(blocked_rdev))
-               md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+       if (unlikely(s.blocked_rdev))
+               md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
+
+       if (s.handle_bad_blocks)
+               for (i = disks; i--; ) {
+                       mdk_rdev_t *rdev;
+                       struct r5dev *dev = &sh->dev[i];
+                       if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
+                               /* We own a safe reference to the rdev */
+                               rdev = conf->disks[i].rdev;
+                               if (!rdev_set_badblocks(rdev, sh->sector,
+                                                       STRIPE_SECTORS, 0))
+                                       md_error(conf->mddev, rdev);
+                               rdev_dec_pending(rdev, conf->mddev);
+                       }
+                       if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
+                               rdev = conf->disks[i].rdev;
+                               rdev_clear_badblocks(rdev, sh->sector,
+                                                    STRIPE_SECTORS);
+                               rdev_dec_pending(rdev, conf->mddev);
+                       }
+               }
 
        if (s.ops_request)
                raid_run_ops(sh, s.ops_request);
 
        ops_run_io(sh, &s);
 
-
-       if (dec_preread_active) {
+       if (s.dec_preread_active) {
                /* We delay this until after ops_run_io so that if make_request
                 * is waiting on a flush, it won't continue until the writes
                 * have actually been submitted.
@@ -3595,15 +3375,9 @@ static void handle_stripe6(struct stripe_head *sh)
                        md_wakeup_thread(conf->mddev->thread);
        }
 
-       return_io(return_bi);
-}
+       return_io(s.return_bi);
 
-static void handle_stripe(struct stripe_head *sh)
-{
-       if (sh->raid_conf->level == 6)
-               handle_stripe6(sh);
-       else
-               handle_stripe5(sh);
+       clear_bit(STRIPE_ACTIVE, &sh->state);
 }
 
 static void raid5_activate_delayed(raid5_conf_t *conf)
@@ -3833,6 +3607,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
        rcu_read_lock();
        rdev = rcu_dereference(conf->disks[dd_idx].rdev);
        if (rdev && test_bit(In_sync, &rdev->flags)) {
+               sector_t first_bad;
+               int bad_sectors;
+
                atomic_inc(&rdev->nr_pending);
                rcu_read_unlock();
                raid_bio->bi_next = (void*)rdev;
@@ -3840,8 +3617,10 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
                align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
                align_bi->bi_sector += rdev->data_offset;
 
-               if (!bio_fits_rdev(align_bi)) {
-                       /* too big in some way */
+               if (!bio_fits_rdev(align_bi) ||
+                   is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+                               &first_bad, &bad_sectors)) {
+                       /* too big in some way, or has a known bad block */
                        bio_put(align_bi);
                        rdev_dec_pending(rdev, mddev);
                        return 0;
@@ -4016,7 +3795,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                                }
                        }
 
-                       if (bio_data_dir(bi) == WRITE &&
+                       if (rw == WRITE &&
                            logical_sector >= mddev->suspend_lo &&
                            logical_sector < mddev->suspend_hi) {
                                release_stripe(sh);
@@ -4034,7 +3813,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
                        }
 
                        if (test_bit(STRIPE_EXPANDING, &sh->state) ||
-                           !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
+                           !add_stripe_bio(sh, bi, dd_idx, rw)) {
                                /* Stripe is busy expanding or
                                 * add failed due to overlap.  Flush everything
                                 * and wait a while
@@ -4375,10 +4154,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
        bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
 
-       spin_lock(&sh->lock);
-       set_bit(STRIPE_SYNCING, &sh->state);
-       clear_bit(STRIPE_INSYNC, &sh->state);
-       spin_unlock(&sh->lock);
+       set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
 
        handle_stripe(sh);
        release_stripe(sh);
@@ -4509,6 +4285,9 @@ static void raid5d(mddev_t *mddev)
                release_stripe(sh);
                cond_resched();
 
+               if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+                       md_check_recovery(mddev);
+
                spin_lock_irq(&conf->device_lock);
        }
        pr_debug("%d stripes handled\n", handled);
@@ -5313,6 +5092,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
                 * isn't possible.
                 */
                if (!test_bit(Faulty, &rdev->flags) &&
+                   mddev->recovery_disabled != conf->recovery_disabled &&
                    !has_failed(conf) &&
                    number < conf->raid_disks) {
                        err = -EBUSY;
@@ -5341,6 +5121,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
        int first = 0;
        int last = conf->raid_disks - 1;
 
+       if (mddev->recovery_disabled == conf->recovery_disabled)
+               return -EBUSY;
+
        if (has_failed(conf))
                /* no point adding a device */
                return -EINVAL;
@@ -5519,16 +5302,14 @@ static int raid5_start_reshape(mddev_t *mddev)
                        if (rdev->raid_disk < 0 &&
                            !test_bit(Faulty, &rdev->flags)) {
                                if (raid5_add_disk(mddev, rdev) == 0) {
-                                       char nm[20];
                                        if (rdev->raid_disk
                                            >= conf->previous_raid_disks) {
                                                set_bit(In_sync, &rdev->flags);
                                                added_devices++;
                                        } else
                                                rdev->recovery_offset = 0;
-                                       sprintf(nm, "rd%d", rdev->raid_disk);
-                                       if (sysfs_create_link(&mddev->kobj,
-                                                             &rdev->kobj, nm))
+
+                                       if (sysfs_link_rdev(mddev, rdev))
                                                /* Failure here is OK */;
                                }
                        } else if (rdev->raid_disk >= conf->previous_raid_disks
@@ -5624,9 +5405,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
                             d++) {
                                mdk_rdev_t *rdev = conf->disks[d].rdev;
                                if (rdev && raid5_remove_disk(mddev, d) == 0) {
-                                       char nm[20];
-                                       sprintf(nm, "rd%d", rdev->raid_disk);
-                                       sysfs_remove_link(&mddev->kobj, nm);
+                                       sysfs_unlink_rdev(mddev, rdev);
                                        rdev->raid_disk = -1;
                                }
                        }
index 3ca77a2..11b9566 100644 (file)
@@ -6,11 +6,11 @@
 
 /*
  *
- * Each stripe contains one buffer per disc.  Each buffer can be in
+ * Each stripe contains one buffer per device.  Each buffer can be in
  * one of a number of states stored in "flags".  Changes between
- * these states happen *almost* exclusively under a per-stripe
- * spinlock.  Some very specific changes can happen in bi_end_io, and
- * these are not protected by the spin lock.
+ * these states happen *almost* exclusively under the protection of the
+ * STRIPE_ACTIVE flag.  Some very specific changes can happen in bi_end_io, and
+ * these are not protected by STRIPE_ACTIVE.
  *
  * The flag bits that are used to represent these states are:
  *   R5_UPTODATE and R5_LOCKED
  * block and the cached buffer are successfully written, any buffer on
  * a written list can be returned with b_end_io.
  *
- * The write list and read list both act as fifos.  The read list is
- * protected by the device_lock.  The write and written lists are
- * protected by the stripe lock.  The device_lock, which can be
- * claimed while the stipe lock is held, is only for list
- * manipulations and will only be held for a very short time.  It can
- * be claimed from interrupts.
+ * The write list and read list both act as fifos.  The read list,
+ * write list and written list are protected by the device_lock.
+ * The device_lock is only for list manipulations and will only be
+ * held for a very short time.  It can be claimed from interrupts.
  *
  *
  * Stripes in the stripe cache can be on one of two lists (or on
@@ -96,7 +94,6 @@
  *
  * The inactive_list, handle_list and hash bucket lists are all protected by the
  * device_lock.
- *  - stripes on the inactive_list never have their stripe_lock held.
  *  - stripes have a reference counter. If count==0, they are on a list.
  *  - If a stripe might need handling, STRIPE_HANDLE is set.
  *  - When refcount reaches zero, then if STRIPE_HANDLE it is put on
  *  attach a request to an active stripe (add_stripe_bh())
  *     lockdev attach-buffer unlockdev
  *  handle a stripe (handle_stripe())
- *     lockstripe clrSTRIPE_HANDLE ...
+ *     setSTRIPE_ACTIVE,  clrSTRIPE_HANDLE ...
  *             (lockdev check-buffers unlockdev) ..
  *             change-state ..
- *             record io/ops needed unlockstripe schedule io/ops
+ *             record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
  *  release an active stripe (release_stripe())
  *     lockdev if (!--cnt) { if  STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
  *
  * on a cached buffer, and plus one if the stripe is undergoing stripe
  * operations.
  *
- * Stripe operations are performed outside the stripe lock,
- * the stripe operations are:
+ * The stripe operations are:
  * -copying data between the stripe cache and user application buffers
  * -computing blocks to save a disk access, or to recover a missing block
  * -updating the parity on a write operation (reconstruct write and
  */
 
 /*
- * Operations state - intermediate states that are visible outside of sh->lock
+ * Operations state - intermediate states that are visible outside of 
+ *   STRIPE_ACTIVE.
  * In general _idle indicates nothing is running, _run indicates a data
  * processing operation is active, and _result means the data processing result
  * is stable and can be acted upon.  For simple operations like biofill and
@@ -209,7 +206,6 @@ struct stripe_head {
        short                   ddf_layout;/* use DDF ordering to calculate Q */
        unsigned long           state;          /* state flags */
        atomic_t                count;        /* nr of active thread/requests */
-       spinlock_t              lock;
        int                     bm_seq; /* sequence number for bitmap flushes */
        int                     disks;          /* disks in stripe */
        enum check_states       check_state;
@@ -240,19 +236,20 @@ struct stripe_head {
 };
 
 /* stripe_head_state - collects and tracks the dynamic state of a stripe_head
- *     for handle_stripe.  It is only valid under spin_lock(sh->lock);
+ *     for handle_stripe.
  */
 struct stripe_head_state {
        int syncing, expanding, expanded;
        int locked, uptodate, to_read, to_write, failed, written;
        int to_fill, compute, req_compute, non_overwrite;
-       int failed_num;
+       int failed_num[2];
+       int p_failed, q_failed;
+       int dec_preread_active;
        unsigned long ops_request;
-};
 
-/* r6_state - extra state data only relevant to r6 */
-struct r6_state {
-       int p_failed, q_failed, failed_num[2];
+       struct bio *return_bi;
+       mdk_rdev_t *blocked_rdev;
+       int handle_bad_blocks;
 };
 
 /* Flags */
@@ -268,14 +265,16 @@ struct r6_state {
 #define        R5_ReWrite      9       /* have tried to over-write the readerror */
 
 #define        R5_Expanded     10      /* This block now has post-expand data */
-#define        R5_Wantcompute  11 /* compute_block in progress treat as
-                                   * uptodate
-                                   */
-#define        R5_Wantfill     12 /* dev->toread contains a bio that needs
-                                   * filling
-                                   */
-#define R5_Wantdrain   13 /* dev->towrite needs to be drained */
-#define R5_WantFUA     14      /* Write should be FUA */
+#define        R5_Wantcompute  11      /* compute_block in progress treat as
+                                * uptodate
+                                */
+#define        R5_Wantfill     12      /* dev->toread contains a bio that needs
+                                * filling
+                                */
+#define        R5_Wantdrain    13      /* dev->towrite needs to be drained */
+#define        R5_WantFUA      14      /* Write should be FUA */
+#define        R5_WriteError   15      /* got a write error - need to record it */
+#define        R5_MadeGood     16      /* A bad block has been fixed by writing to it*/
 /*
  * Write method
  */
@@ -289,21 +288,25 @@ struct r6_state {
 /*
  * Stripe state
  */
-#define STRIPE_HANDLE          2
-#define        STRIPE_SYNCING          3
-#define        STRIPE_INSYNC           4
-#define        STRIPE_PREREAD_ACTIVE   5
-#define        STRIPE_DELAYED          6
-#define        STRIPE_DEGRADED         7
-#define        STRIPE_BIT_DELAY        8
-#define        STRIPE_EXPANDING        9
-#define        STRIPE_EXPAND_SOURCE    10
-#define        STRIPE_EXPAND_READY     11
-#define        STRIPE_IO_STARTED       12 /* do not count towards 'bypass_count' */
-#define        STRIPE_FULL_WRITE       13 /* all blocks are set to be overwritten */
-#define        STRIPE_BIOFILL_RUN      14
-#define        STRIPE_COMPUTE_RUN      15
-#define        STRIPE_OPS_REQ_PENDING  16
+enum {
+       STRIPE_ACTIVE,
+       STRIPE_HANDLE,
+       STRIPE_SYNC_REQUESTED,
+       STRIPE_SYNCING,
+       STRIPE_INSYNC,
+       STRIPE_PREREAD_ACTIVE,
+       STRIPE_DELAYED,
+       STRIPE_DEGRADED,
+       STRIPE_BIT_DELAY,
+       STRIPE_EXPANDING,
+       STRIPE_EXPAND_SOURCE,
+       STRIPE_EXPAND_READY,
+       STRIPE_IO_STARTED,      /* do not count towards 'bypass_count' */
+       STRIPE_FULL_WRITE,      /* all blocks are set to be overwritten */
+       STRIPE_BIOFILL_RUN,
+       STRIPE_COMPUTE_RUN,
+       STRIPE_OPS_REQ_PENDING,
+};
 
 /*
  * Operation request flags
@@ -336,7 +339,7 @@ struct r6_state {
  * PREREAD_ACTIVE.
  * In stripe_handle, if we find pre-reading is necessary, we do it if
  * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
- * HANDLE gets cleared if stripe_handle leave nothing locked.
+ * HANDLE gets cleared if stripe_handle leaves nothing locked.
  */
 
 
@@ -399,7 +402,7 @@ struct raid5_private_data {
                                            * (fresh device added).
                                            * Cleared when a sync completes.
                                            */
-
+       int                     recovery_disabled;
        /* per cpu variables */
        struct raid5_percpu {
                struct page     *spare_page; /* Used when checking P/Q in raid6 */
index b7622c3..e1eca2a 100644 (file)
@@ -282,6 +282,7 @@ obj-$(CONFIG_USB_HSO)               += usb/
 obj-$(CONFIG_USB_USBNET)        += usb/
 obj-$(CONFIG_USB_ZD1201)        += usb/
 obj-$(CONFIG_USB_IPHETH)        += usb/
+obj-$(CONFIG_USB_CDC_PHONET)   += usb/
 
 obj-$(CONFIG_WLAN) += wireless/
 obj-$(CONFIG_NET_TULIP) += tulip/
index 536038b..31798f5 100644 (file)
@@ -1502,13 +1502,13 @@ static int __devinit ace_init(struct net_device *dev)
         * firmware to wipe the ring without re-initializing it.
         */
        if (!test_and_set_bit(0, &ap->std_refill_busy))
-               ace_load_std_rx_ring(ap, RX_RING_SIZE);
+               ace_load_std_rx_ring(dev, RX_RING_SIZE);
        else
                printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n",
                       ap->name);
        if (ap->version >= 2) {
                if (!test_and_set_bit(0, &ap->mini_refill_busy))
-                       ace_load_mini_rx_ring(ap, RX_MINI_SIZE);
+                       ace_load_mini_rx_ring(dev, RX_MINI_SIZE);
                else
                        printk(KERN_ERR "%s: Someone is busy refilling "
                               "the RX mini ring\n", ap->name);
@@ -1584,9 +1584,10 @@ static void ace_watchdog(struct net_device *data)
 }
 
 
-static void ace_tasklet(unsigned long dev)
+static void ace_tasklet(unsigned long arg)
 {
-       struct ace_private *ap = netdev_priv((struct net_device *)dev);
+       struct net_device *dev = (struct net_device *) arg;
+       struct ace_private *ap = netdev_priv(dev);
        int cur_size;
 
        cur_size = atomic_read(&ap->cur_rx_bufs);
@@ -1595,7 +1596,7 @@ static void ace_tasklet(unsigned long dev)
 #ifdef DEBUG
                printk("refilling buffers (current %i)\n", cur_size);
 #endif
-               ace_load_std_rx_ring(ap, RX_RING_SIZE - cur_size);
+               ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size);
        }
 
        if (ap->version >= 2) {
@@ -1606,7 +1607,7 @@ static void ace_tasklet(unsigned long dev)
                        printk("refilling mini buffers (current %i)\n",
                               cur_size);
 #endif
-                       ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size);
+                       ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size);
                }
        }
 
@@ -1616,7 +1617,7 @@ static void ace_tasklet(unsigned long dev)
 #ifdef DEBUG
                printk("refilling jumbo buffers (current %i)\n", cur_size);
 #endif
-               ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size);
+               ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
        }
        ap->tasklet_pending = 0;
 }
@@ -1642,8 +1643,9 @@ static void ace_dump_trace(struct ace_private *ap)
  * done only before the device is enabled, thus no interrupts are
  * generated and by the interrupt handler/tasklet handler.
  */
-static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
 {
+       struct ace_private *ap = netdev_priv(dev);
        struct ace_regs __iomem *regs = ap->regs;
        short i, idx;
 
@@ -1657,11 +1659,10 @@ static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs)
                struct rx_desc *rd;
                dma_addr_t mapping;
 
-               skb = dev_alloc_skb(ACE_STD_BUFSIZE + NET_IP_ALIGN);
+               skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE);
                if (!skb)
                        break;
 
-               skb_reserve(skb, NET_IP_ALIGN);
                mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
                                       offset_in_page(skb->data),
                                       ACE_STD_BUFSIZE,
@@ -1705,8 +1706,9 @@ static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs)
 }
 
 
-static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs)
 {
+       struct ace_private *ap = netdev_priv(dev);
        struct ace_regs __iomem *regs = ap->regs;
        short i, idx;
 
@@ -1718,11 +1720,10 @@ static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs)
                struct rx_desc *rd;
                dma_addr_t mapping;
 
-               skb = dev_alloc_skb(ACE_MINI_BUFSIZE + NET_IP_ALIGN);
+               skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE);
                if (!skb)
                        break;
 
-               skb_reserve(skb, NET_IP_ALIGN);
                mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
                                       offset_in_page(skb->data),
                                       ACE_MINI_BUFSIZE,
@@ -1762,8 +1763,9 @@ static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs)
  * Load the jumbo rx ring, this may happen at any time if the MTU
  * is changed to a value > 1500.
  */
-static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs)
 {
+       struct ace_private *ap = netdev_priv(dev);
        struct ace_regs __iomem *regs = ap->regs;
        short i, idx;
 
@@ -1774,11 +1776,10 @@ static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs)
                struct rx_desc *rd;
                dma_addr_t mapping;
 
-               skb = dev_alloc_skb(ACE_JUMBO_BUFSIZE + NET_IP_ALIGN);
+               skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE);
                if (!skb)
                        break;
 
-               skb_reserve(skb, NET_IP_ALIGN);
                mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
                                       offset_in_page(skb->data),
                                       ACE_JUMBO_BUFSIZE,
@@ -2196,7 +2197,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
 #ifdef DEBUG
                                printk("low on std buffers %i\n", cur_size);
 #endif
-                               ace_load_std_rx_ring(ap,
+                               ace_load_std_rx_ring(dev,
                                                     RX_RING_SIZE - cur_size);
                        } else
                                run_tasklet = 1;
@@ -2212,7 +2213,8 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
                                        printk("low on mini buffers %i\n",
                                               cur_size);
 #endif
-                                       ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size);
+                                       ace_load_mini_rx_ring(dev,
+                                                             RX_MINI_SIZE - cur_size);
                                } else
                                        run_tasklet = 1;
                        }
@@ -2228,7 +2230,8 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
                                        printk("low on jumbo buffers %i\n",
                                               cur_size);
 #endif
-                                       ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size);
+                                       ace_load_jumbo_rx_ring(dev,
+                                                              RX_JUMBO_SIZE - cur_size);
                                } else
                                        run_tasklet = 1;
                        }
@@ -2267,7 +2270,7 @@ static int ace_open(struct net_device *dev)
 
        if (ap->jumbo &&
            !test_and_set_bit(0, &ap->jumbo_refill_busy))
-               ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE);
+               ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
 
        if (dev->flags & IFF_PROMISC) {
                cmd.evt = C_SET_PROMISC_MODE;
@@ -2575,7 +2578,7 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu)
                               "support\n", dev->name);
                        ap->jumbo = 1;
                        if (!test_and_set_bit(0, &ap->jumbo_refill_busy))
-                               ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE);
+                               ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
                        ace_set_rxtx_parms(dev, 1);
                }
        } else {
index f67dc9b..51c486c 100644 (file)
@@ -766,9 +766,9 @@ static inline void ace_unmask_irq(struct net_device *dev)
  * Prototypes
  */
 static int ace_init(struct net_device *dev);
-static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs);
-static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs);
-static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs);
+static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs);
+static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs);
+static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs);
 static irqreturn_t ace_interrupt(int irq, void *dev_id);
 static int ace_load_firmware(struct net_device *dev);
 static int ace_open(struct net_device *dev);
index 02842d0..38a83ac 100644 (file)
@@ -1557,8 +1557,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
                        if (slave_dev->type != ARPHRD_ETHER)
                                bond_setup_by_slave(bond_dev, slave_dev);
-                       else
+                       else {
                                ether_setup(bond_dev);
+                               bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+                       }
 
                        netdev_bonding_change(bond_dev,
                                              NETDEV_POST_TYPE_CHANGE);
@@ -4330,7 +4332,7 @@ static void bond_setup(struct net_device *bond_dev)
        bond_dev->tx_queue_len = 0;
        bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
        bond_dev->priv_flags |= IFF_BONDING;
-       bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+       bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 
        /* At first, we block adding VLANs. That's the only way to
         * prevent problems that occur when adding VLANs over an
@@ -4691,7 +4693,7 @@ static int bond_check_params(struct bond_params *params)
                /* miimon and arp_interval not set, we need one so things
                 * work as expected, see bonding.txt for details
                 */
-               pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
+               pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
        }
 
        if (primary && !USES_PRIMARY(bond_mode)) {
index b60835f..2dfb4bf 100644 (file)
@@ -1025,6 +1025,7 @@ static ssize_t bonding_store_primary(struct device *d,
        int i;
        struct slave *slave;
        struct bonding *bond = to_bond(d);
+       char ifname[IFNAMSIZ];
 
        if (!rtnl_trylock())
                return restart_syscall();
@@ -1035,32 +1036,33 @@ static ssize_t bonding_store_primary(struct device *d,
        if (!USES_PRIMARY(bond->params.mode)) {
                pr_info("%s: Unable to set primary slave; %s is in mode %d\n",
                        bond->dev->name, bond->dev->name, bond->params.mode);
-       } else {
-               bond_for_each_slave(bond, slave, i) {
-                       if (strnicmp
-                           (slave->dev->name, buf,
-                            strlen(slave->dev->name)) == 0) {
-                               pr_info("%s: Setting %s as primary slave.\n",
-                                       bond->dev->name, slave->dev->name);
-                               bond->primary_slave = slave;
-                               strcpy(bond->params.primary, slave->dev->name);
-                               bond_select_active_slave(bond);
-                               goto out;
-                       }
-               }
+               goto out;
+       }
 
-               /* if we got here, then we didn't match the name of any slave */
+       sscanf(buf, "%16s", ifname); /* IFNAMSIZ */
 
-               if (strlen(buf) == 0 || buf[0] == '\n') {
-                       pr_info("%s: Setting primary slave to None.\n",
-                               bond->dev->name);
-                       bond->primary_slave = NULL;
-                               bond_select_active_slave(bond);
-               } else {
-                       pr_info("%s: Unable to set %.*s as primary slave as it is not a slave.\n",
-                               bond->dev->name, (int)strlen(buf) - 1, buf);
+       /* check to see if we are clearing primary */
+       if (!strlen(ifname) || buf[0] == '\n') {
+               pr_info("%s: Setting primary slave to None.\n",
+                       bond->dev->name);
+               bond->primary_slave = NULL;
+               bond_select_active_slave(bond);
+               goto out;
+       }
+
+       bond_for_each_slave(bond, slave, i) {
+               if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
+                       pr_info("%s: Setting %s as primary slave.\n",
+                               bond->dev->name, slave->dev->name);
+                       bond->primary_slave = slave;
+                       strcpy(bond->params.primary, slave->dev->name);
+                       bond_select_active_slave(bond);
+                       goto out;
                }
        }
+
+       pr_info("%s: Unable to set %.*s as primary slave.\n",
+               bond->dev->name, (int)strlen(buf) - 1, buf);
 out:
        write_unlock_bh(&bond->curr_slave_lock);
        read_unlock(&bond->lock);
@@ -1195,6 +1197,7 @@ static ssize_t bonding_store_active_slave(struct device *d,
        struct slave *old_active = NULL;
        struct slave *new_active = NULL;
        struct bonding *bond = to_bond(d);
+       char ifname[IFNAMSIZ];
 
        if (!rtnl_trylock())
                return restart_syscall();
@@ -1203,56 +1206,62 @@ static ssize_t bonding_store_active_slave(struct device *d,
        read_lock(&bond->lock);
        write_lock_bh(&bond->curr_slave_lock);
 
-       if (!USES_PRIMARY(bond->params.mode))
+       if (!USES_PRIMARY(bond->params.mode)) {
                pr_info("%s: Unable to change active slave; %s is in mode %d\n",
                        bond->dev->name, bond->dev->name, bond->params.mode);
-       else {
-               bond_for_each_slave(bond, slave, i) {
-                       if (strnicmp
-                           (slave->dev->name, buf,
-                            strlen(slave->dev->name)) == 0) {
-                               old_active = bond->curr_active_slave;
-                               new_active = slave;
-                               if (new_active == old_active) {
-                                       /* do nothing */
-                                       pr_info("%s: %s is already the current active slave.\n",
+               goto out;
+       }
+
+       sscanf(buf, "%16s", ifname); /* IFNAMSIZ */
+
+       /* check to see if we are clearing active */
+       if (!strlen(ifname) || buf[0] == '\n') {
+               pr_info("%s: Clearing current active slave.\n",
+                       bond->dev->name);
+               bond->curr_active_slave = NULL;
+               bond_select_active_slave(bond);
+               goto out;
+       }
+
+       bond_for_each_slave(bond, slave, i) {
+               if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
+                       old_active = bond->curr_active_slave;
+                       new_active = slave;
+                       if (new_active == old_active) {
+                               /* do nothing */
+                               pr_info("%s: %s is already the current"
+                                       " active slave.\n",
+                                       bond->dev->name,
+                                       slave->dev->name);
+                               goto out;
+                       }
+                       else {
+                               if ((new_active) &&
+                                   (old_active) &&
+                                   (new_active->link == BOND_LINK_UP) &&
+                                   IS_UP(new_active->dev)) {
+                                       pr_info("%s: Setting %s as active"
+                                               " slave.\n",
                                                bond->dev->name,
                                                slave->dev->name);
-                                       goto out;
+                                       bond_change_active_slave(bond,
+                                                                new_active);
                                }
                                else {
-                                       if ((new_active) &&
-                                           (old_active) &&
-                                           (new_active->link == BOND_LINK_UP) &&
-                                           IS_UP(new_active->dev)) {
-                                               pr_info("%s: Setting %s as active slave.\n",
-                                                       bond->dev->name,
-                                                       slave->dev->name);
-                                                       bond_change_active_slave(bond, new_active);
-                                       }
-                                       else {
-                                               pr_info("%s: Could not set %s as active slave; either %s is down or the link is down.\n",
-                                                       bond->dev->name,
-                                                       slave->dev->name,
-                                                       slave->dev->name);
-                                       }
-                                       goto out;
+                                       pr_info("%s: Could not set %s as"
+                                               " active slave; either %s is"
+                                               " down or the link is down.\n",
+                                               bond->dev->name,
+                                               slave->dev->name,
+                                               slave->dev->name);
                                }
+                               goto out;
                        }
                }
-
-               /* if we got here, then we didn't match the name of any slave */
-
-               if (strlen(buf) == 0 || buf[0] == '\n') {
-                       pr_info("%s: Setting active slave to None.\n",
-                               bond->dev->name);
-                       bond->primary_slave = NULL;
-                       bond_select_active_slave(bond);
-               } else {
-                       pr_info("%s: Unable to set %.*s as active slave as it is not a slave.\n",
-                               bond->dev->name, (int)strlen(buf) - 1, buf);
-               }
        }
+
+       pr_info("%s: Unable to set %.*s as active slave.\n",
+               bond->dev->name, (int)strlen(buf) - 1, buf);
  out:
        write_unlock_bh(&bond->curr_slave_lock);
        read_unlock(&bond->lock);
index e64cd9c..e55df30 100644 (file)
@@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
                        prefetch(skb->data);
 
                        vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
-                       if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
+
+                       /*
+                        * There's need to check for NETIF_F_HW_VLAN_RX here.
+                        * Even if vlan rx accel is disabled,
+                        * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set.
+                        */
+                       if (dev->features & NETIF_F_HW_VLAN_RX &&
+                           vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
                                u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
 
                                __vlan_hwaccel_put_tag(skb, vid);
@@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
                dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG |
                        NETIF_F_TSO | NETIF_F_RXCSUM;
-               dev->features |= dev->hw_features;
        }
 
        np->vlanctl_bits = 0;
        if (id->driver_data & DEV_HAS_VLAN) {
                np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
-               dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
+               dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
        }
 
+       dev->features |= dev->hw_features;
+
        np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
        if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
            (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
@@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                goto out_error;
        }
 
+       nv_vlan_mode(dev, dev->features);
+
        netif_carrier_off(dev);
 
        dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
index 835cd25..2659daa 100644 (file)
@@ -388,12 +388,8 @@ static void gfar_init_mac(struct net_device *ndev)
        if (priv->hwts_rx_en)
                rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE;
 
-       /* keep vlan related bits if it's enabled */
-       if (ndev->features & NETIF_F_HW_VLAN_TX)
-               rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
-
        if (ndev->features & NETIF_F_HW_VLAN_RX)
-               tctrl |= TCTRL_VLINS;
+               rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
 
        /* Init rctrl based on our settings */
        gfar_write(&regs->rctrl, rctrl);
index 6e82dd3..46b5f5f 100644 (file)
@@ -183,7 +183,7 @@ static void ifb_setup(struct net_device *dev)
 
        dev->flags |= IFF_NOARP;
        dev->flags &= ~IFF_MULTICAST;
-       dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+       dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
        random_ether_addr(dev->dev_addr);
 }
 
index ba631fc..05172c3 100644 (file)
@@ -572,7 +572,7 @@ void macvlan_common_setup(struct net_device *dev)
 {
        ether_setup(dev);
 
-       dev->priv_flags        &= ~IFF_XMIT_DST_RELEASE;
+       dev->priv_flags        &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
        dev->netdev_ops         = &macvlan_netdev_ops;
        dev->destructor         = free_netdev;
        dev->header_ops         = &macvlan_hard_header_ops,
index 8035765..dc3fbf6 100644 (file)
@@ -190,6 +190,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)            ((tnapi)->tx_pending / 4)
+#define TG3_TX_BD_DMA_MAX              4096
 
 #define TG3_RAW_IP_ALIGN 2
 
@@ -4824,7 +4825,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
        txq = netdev_get_tx_queue(tp->dev, index);
 
        while (sw_idx != hw_idx) {
-               struct ring_info *ri = &tnapi->tx_buffers[sw_idx];
+               struct tg3_tx_ring_info *ri = &tnapi->tx_buffers[sw_idx];
                struct sk_buff *skb = ri->skb;
                int i, tx_bug = 0;
 
@@ -4840,6 +4841,12 @@ static void tg3_tx(struct tg3_napi *tnapi)
 
                ri->skb = NULL;
 
+               while (ri->fragmented) {
+                       ri->fragmented = false;
+                       sw_idx = NEXT_TX(sw_idx);
+                       ri = &tnapi->tx_buffers[sw_idx];
+               }
+
                sw_idx = NEXT_TX(sw_idx);
 
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -4851,6 +4858,13 @@ static void tg3_tx(struct tg3_napi *tnapi)
                                       dma_unmap_addr(ri, mapping),
                                       skb_shinfo(skb)->frags[i].size,
                                       PCI_DMA_TODEVICE);
+
+                       while (ri->fragmented) {
+                               ri->fragmented = false;
+                               sw_idx = NEXT_TX(sw_idx);
+                               ri = &tnapi->tx_buffers[sw_idx];
+                       }
+
                        sw_idx = NEXT_TX(sw_idx);
                }
 
@@ -5901,40 +5915,100 @@ static inline int tg3_40bit_overflow_test(struct tg3 *tp, dma_addr_t mapping,
 #endif
 }
 
-static void tg3_set_txd(struct tg3_napi *tnapi, int entry,
-                       dma_addr_t mapping, int len, u32 flags,
-                       u32 mss_and_is_end)
+static inline void tg3_tx_set_bd(struct tg3_tx_buffer_desc *txbd,
+                                dma_addr_t mapping, u32 len, u32 flags,
+                                u32 mss, u32 vlan)
+{
+       txbd->addr_hi = ((u64) mapping >> 32);
+       txbd->addr_lo = ((u64) mapping & 0xffffffff);
+       txbd->len_flags = (len << TXD_LEN_SHIFT) | (flags & 0x0000ffff);
+       txbd->vlan_tag = (mss << TXD_MSS_SHIFT) | (vlan << TXD_VLAN_TAG_SHIFT);
+}
+
+static bool tg3_tx_frag_set(struct tg3_napi *tnapi, u32 *entry, u32 *budget,
+                           dma_addr_t map, u32 len, u32 flags,
+                           u32 mss, u32 vlan)
 {
-       struct tg3_tx_buffer_desc *txd = &tnapi->tx_ring[entry];
-       int is_end = (mss_and_is_end & 0x1);
-       u32 mss = (mss_and_is_end >> 1);
-       u32 vlan_tag = 0;
+       struct tg3 *tp = tnapi->tp;
+       bool hwbug = false;
+
+       if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8)
+               hwbug = 1;
+
+       if (tg3_4g_overflow_test(map, len))
+               hwbug = 1;
+
+       if (tg3_40bit_overflow_test(tp, map, len))
+               hwbug = 1;
+
+       if (tg3_flag(tp, 4K_FIFO_LIMIT)) {
+               u32 tmp_flag = flags & ~TXD_FLAG_END;
+               while (len > TG3_TX_BD_DMA_MAX) {
+                       u32 frag_len = TG3_TX_BD_DMA_MAX;
+                       len -= TG3_TX_BD_DMA_MAX;
+
+                       if (len) {
+                               tnapi->tx_buffers[*entry].fragmented = true;
+                               /* Avoid the 8byte DMA problem */
+                               if (len <= 8) {
+                                       len += TG3_TX_BD_DMA_MAX / 2;
+                                       frag_len = TG3_TX_BD_DMA_MAX / 2;
+                               }
+                       } else
+                               tmp_flag = flags;
+
+                       if (*budget) {
+                               tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+                                             frag_len, tmp_flag, mss, vlan);
+                               (*budget)--;
+                               *entry = NEXT_TX(*entry);
+                       } else {
+                               hwbug = 1;
+                               break;
+                       }
+
+                       map += frag_len;
+               }
 
-       if (is_end)
-               flags |= TXD_FLAG_END;
-       if (flags & TXD_FLAG_VLAN) {
-               vlan_tag = flags >> 16;
-               flags &= 0xffff;
+               if (len) {
+                       if (*budget) {
+                               tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+                                             len, flags, mss, vlan);
+                               (*budget)--;
+                               *entry = NEXT_TX(*entry);
+                       } else {
+                               hwbug = 1;
+                       }
+               }
+       } else {
+               tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+                             len, flags, mss, vlan);
+               *entry = NEXT_TX(*entry);
        }
-       vlan_tag |= (mss << TXD_MSS_SHIFT);
 
-       txd->addr_hi = ((u64) mapping >> 32);
-       txd->addr_lo = ((u64) mapping & 0xffffffff);
-       txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
-       txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+       return hwbug;
 }
 
-static void tg3_skb_error_unmap(struct tg3_napi *tnapi,
-                               struct sk_buff *skb, int last)
+static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
 {
        int i;
-       u32 entry = tnapi->tx_prod;
-       struct ring_info *txb = &tnapi->tx_buffers[entry];
+       struct sk_buff *skb;
+       struct tg3_tx_ring_info *txb = &tnapi->tx_buffers[entry];
+
+       skb = txb->skb;
+       txb->skb = NULL;
 
        pci_unmap_single(tnapi->tp->pdev,
                         dma_unmap_addr(txb, mapping),
                         skb_headlen(skb),
                         PCI_DMA_TODEVICE);
+
+       while (txb->fragmented) {
+               txb->fragmented = false;
+               entry = NEXT_TX(entry);
+               txb = &tnapi->tx_buffers[entry];
+       }
+
        for (i = 0; i < last; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
@@ -5944,18 +6018,24 @@ static void tg3_skb_error_unmap(struct tg3_napi *tnapi,
                pci_unmap_page(tnapi->tp->pdev,
                               dma_unmap_addr(txb, mapping),
                               frag->size, PCI_DMA_TODEVICE);
+
+               while (txb->fragmented) {
+                       txb->fragmented = false;
+                       entry = NEXT_TX(entry);
+                       txb = &tnapi->tx_buffers[entry];
+               }
        }
 }
 
 /* Workaround 4GB and 40-bit hardware DMA bugs. */
 static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
                                       struct sk_buff *skb,
-                                      u32 base_flags, u32 mss)
+                                      u32 *entry, u32 *budget,
+                                      u32 base_flags, u32 mss, u32 vlan)
 {
        struct tg3 *tp = tnapi->tp;
        struct sk_buff *new_skb;
        dma_addr_t new_addr = 0;
-       u32 entry = tnapi->tx_prod;
        int ret = 0;
 
        if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
@@ -5976,24 +6056,22 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
                                          PCI_DMA_TODEVICE);
                /* Make sure the mapping succeeded */
                if (pci_dma_mapping_error(tp->pdev, new_addr)) {
-                       ret = -1;
                        dev_kfree_skb(new_skb);
-
-               /* Make sure new skb does not cross any 4G boundaries.
-                * Drop the packet if it does.
-                */
-               } else if (tg3_4g_overflow_test(new_addr, new_skb->len)) {
-                       pci_unmap_single(tp->pdev, new_addr, new_skb->len,
-                                        PCI_DMA_TODEVICE);
                        ret = -1;
-                       dev_kfree_skb(new_skb);
                } else {
-                       tnapi->tx_buffers[entry].skb = new_skb;
-                       dma_unmap_addr_set(&tnapi->tx_buffers[entry],
+                       base_flags |= TXD_FLAG_END;
+
+                       tnapi->tx_buffers[*entry].skb = new_skb;
+                       dma_unmap_addr_set(&tnapi->tx_buffers[*entry],
                                           mapping, new_addr);
 
-                       tg3_set_txd(tnapi, entry, new_addr, new_skb->len,
-                                   base_flags, 1 | (mss << 1));
+                       if (tg3_tx_frag_set(tnapi, entry, budget, new_addr,
+                                           new_skb->len, base_flags,
+                                           mss, vlan)) {
+                               tg3_tx_skb_unmap(tnapi, *entry, 0);
+                               dev_kfree_skb(new_skb);
+                               ret = -1;
+                       }
                }
        }
 
@@ -6051,7 +6129,8 @@ tg3_tso_bug_end:
 static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct tg3 *tp = netdev_priv(dev);
-       u32 len, entry, base_flags, mss;
+       u32 len, entry, base_flags, mss, vlan = 0;
+       u32 budget;
        int i = -1, would_hit_hwbug;
        dma_addr_t mapping;
        struct tg3_napi *tnapi;
@@ -6063,12 +6142,14 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (tg3_flag(tp, ENABLE_TSS))
                tnapi++;
 
+       budget = tg3_tx_avail(tnapi);
+
        /* We are running in BH disabled context with netif_tx_lock
         * and TX reclaim runs via tp->napi.poll inside of a software
         * interrupt.  Furthermore, IRQ processing runs lockless so we have
         * no IRQ context deadlocks to worry about either.  Rejoice!
         */
-       if (unlikely(tg3_tx_avail(tnapi) <= (skb_shinfo(skb)->nr_frags + 1))) {
+       if (unlikely(budget <= (skb_shinfo(skb)->nr_frags + 1))) {
                if (!netif_tx_queue_stopped(txq)) {
                        netif_tx_stop_queue(txq);
 
@@ -6153,9 +6234,12 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       if (vlan_tx_tag_present(skb))
-               base_flags |= (TXD_FLAG_VLAN |
-                              (vlan_tx_tag_get(skb) << 16));
+#ifdef BCM_KERNEL_SUPPORTS_8021Q
+       if (vlan_tx_tag_present(skb)) {
+               base_flags |= TXD_FLAG_VLAN;
+               vlan = vlan_tx_tag_get(skb);
+       }
+#endif
 
        if (tg3_flag(tp, USE_JUMBO_BDFLAG) &&
            !mss && skb->len > VLAN_ETH_FRAME_LEN)
@@ -6174,25 +6258,23 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        would_hit_hwbug = 0;
 
-       if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8)
-               would_hit_hwbug = 1;
-
-       if (tg3_4g_overflow_test(mapping, len))
-               would_hit_hwbug = 1;
-
-       if (tg3_40bit_overflow_test(tp, mapping, len))
-               would_hit_hwbug = 1;
-
        if (tg3_flag(tp, 5701_DMA_BUG))
                would_hit_hwbug = 1;
 
-       tg3_set_txd(tnapi, entry, mapping, len, base_flags,
-                   (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
-
-       entry = NEXT_TX(entry);
+       if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, len, base_flags |
+                         ((skb_shinfo(skb)->nr_frags == 0) ? TXD_FLAG_END : 0),
+                           mss, vlan))
+               would_hit_hwbug = 1;
 
        /* Now loop through additional data fragments, and queue them. */
        if (skb_shinfo(skb)->nr_frags > 0) {
+               u32 tmp_mss = mss;
+
+               if (!tg3_flag(tp, HW_TSO_1) &&
+                   !tg3_flag(tp, HW_TSO_2) &&
+                   !tg3_flag(tp, HW_TSO_3))
+                       tmp_mss = 0;
+
                last = skb_shinfo(skb)->nr_frags - 1;
                for (i = 0; i <= last; i++) {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -6209,39 +6291,25 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        if (pci_dma_mapping_error(tp->pdev, mapping))
                                goto dma_error;
 
-                       if (tg3_flag(tp, SHORT_DMA_BUG) &&
-                           len <= 8)
+                       if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping,
+                                           len, base_flags |
+                                           ((i == last) ? TXD_FLAG_END : 0),
+                                           tmp_mss, vlan))
                                would_hit_hwbug = 1;
-
-                       if (tg3_4g_overflow_test(mapping, len))
-                               would_hit_hwbug = 1;
-
-                       if (tg3_40bit_overflow_test(tp, mapping, len))
-                               would_hit_hwbug = 1;
-
-                       if (tg3_flag(tp, HW_TSO_1) ||
-                           tg3_flag(tp, HW_TSO_2) ||
-                           tg3_flag(tp, HW_TSO_3))
-                               tg3_set_txd(tnapi, entry, mapping, len,
-                                           base_flags, (i == last)|(mss << 1));
-                       else
-                               tg3_set_txd(tnapi, entry, mapping, len,
-                                           base_flags, (i == last));
-
-                       entry = NEXT_TX(entry);
                }
        }
 
        if (would_hit_hwbug) {
-               tg3_skb_error_unmap(tnapi, skb, i);
+               tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
 
                /* If the workaround fails due to memory/mapping
                 * failure, silently drop this packet.
                 */
-               if (tigon3_dma_hwbug_workaround(tnapi, skb, base_flags, mss))
+               entry = tnapi->tx_prod;
+               budget = tg3_tx_avail(tnapi);
+               if (tigon3_dma_hwbug_workaround(tnapi, skb, &entry, &budget,
+                                               base_flags, mss, vlan))
                        goto out_unlock;
-
-               entry = NEXT_TX(tnapi->tx_prod);
        }
 
        skb_tx_timestamp(skb);
@@ -6269,7 +6337,7 @@ out_unlock:
        return NETDEV_TX_OK;
 
 dma_error:
-       tg3_skb_error_unmap(tnapi, skb, i);
+       tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
        dev_kfree_skb(skb);
        tnapi->tx_buffers[tnapi->tx_prod].skb = NULL;
        return NETDEV_TX_OK;
@@ -6602,35 +6670,13 @@ static void tg3_free_rings(struct tg3 *tp)
                if (!tnapi->tx_buffers)
                        continue;
 
-               for (i = 0; i < TG3_TX_RING_SIZE; ) {
-                       struct ring_info *txp;
-                       struct sk_buff *skb;
-                       unsigned int k;
-
-                       txp = &tnapi->tx_buffers[i];
-                       skb = txp->skb;
+               for (i = 0; i < TG3_TX_RING_SIZE; i++) {
+                       struct sk_buff *skb = tnapi->tx_buffers[i].skb;
 
-                       if (skb == NULL) {
-                               i++;
+                       if (!skb)
                                continue;
-                       }
-
-                       pci_unmap_single(tp->pdev,
-                                        dma_unmap_addr(txp, mapping),
-                                        skb_headlen(skb),
-                                        PCI_DMA_TODEVICE);
-                       txp->skb = NULL;
 
-                       i++;
-
-                       for (k = 0; k < skb_shinfo(skb)->nr_frags; k++) {
-                               txp = &tnapi->tx_buffers[i & (TG3_TX_RING_SIZE - 1)];
-                               pci_unmap_page(tp->pdev,
-                                              dma_unmap_addr(txp, mapping),
-                                              skb_shinfo(skb)->frags[k].size,
-                                              PCI_DMA_TODEVICE);
-                               i++;
-                       }
+                       tg3_tx_skb_unmap(tnapi, i, skb_shinfo(skb)->nr_frags);
 
                        dev_kfree_skb_any(skb);
                }
@@ -6762,9 +6808,9 @@ static int tg3_alloc_consistent(struct tg3 *tp)
                 */
                if ((!i && !tg3_flag(tp, ENABLE_TSS)) ||
                    (i && tg3_flag(tp, ENABLE_TSS))) {
-                       tnapi->tx_buffers = kzalloc(sizeof(struct ring_info) *
-                                                   TG3_TX_RING_SIZE,
-                                                   GFP_KERNEL);
+                       tnapi->tx_buffers = kzalloc(
+                                              sizeof(struct tg3_tx_ring_info) *
+                                              TG3_TX_RING_SIZE, GFP_KERNEL);
                        if (!tnapi->tx_buffers)
                                goto err_out;
 
@@ -8360,7 +8406,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
        /* Program the jumbo buffer descriptor ring control
         * blocks on those devices that have them.
         */
-       if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
+       if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0 ||
            (tg3_flag(tp, JUMBO_CAPABLE) && !tg3_flag(tp, 5780_CLASS))) {
 
                if (tg3_flag(tp, JUMBO_RING_ENABLE)) {
@@ -11204,6 +11250,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 {
        u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key;
        u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
+       u32 budget;
        struct sk_buff *skb, *rx_skb;
        u8 *tx_data;
        dma_addr_t map;
@@ -11363,6 +11410,10 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
                return -EIO;
        }
 
+       val = tnapi->tx_prod;
+       tnapi->tx_buffers[val].skb = skb;
+       dma_unmap_addr_set(&tnapi->tx_buffers[val], mapping, map);
+
        tw32_f(HOSTCC_MODE, tp->coalesce_mode | HOSTCC_MODE_ENABLE |
               rnapi->coal_now);
 
@@ -11370,8 +11421,13 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 
        rx_start_idx = rnapi->hw_status->idx[0].rx_producer;
 
-       tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len,
-                   base_flags, (mss << 1) | 1);
+       budget = tg3_tx_avail(tnapi);
+       if (tg3_tx_frag_set(tnapi, &val, &budget, map, tx_len,
+                           base_flags | TXD_FLAG_END, mss, 0)) {
+               tnapi->tx_buffers[val].skb = NULL;
+               dev_kfree_skb(skb);
+               return -EIO;
+       }
 
        tnapi->tx_prod++;
 
@@ -11394,7 +11450,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
                        break;
        }
 
-       pci_unmap_single(tp->pdev, map, tx_len, PCI_DMA_TODEVICE);
+       tg3_tx_skb_unmap(tnapi, tnapi->tx_prod - 1, 0);
        dev_kfree_skb(skb);
 
        if (tx_idx != tnapi->tx_prod)
@@ -13817,7 +13873,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
                tg3_flag_set(tp, 5705_PLUS);
 
        /* Determine TSO capabilities */
-       if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+       if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0)
                ; /* Do nothing. HW bug. */
        else if (tg3_flag(tp, 57765_PLUS))
                tg3_flag_set(tp, HW_TSO_3);
@@ -13880,11 +13936,14 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
        if (tg3_flag(tp, 5755_PLUS))
                tg3_flag_set(tp, SHORT_DMA_BUG);
 
+       if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+               tg3_flag_set(tp, 4K_FIFO_LIMIT);
+
        if (tg3_flag(tp, 5717_PLUS))
                tg3_flag_set(tp, LRG_PROD_RING_CAP);
 
        if (tg3_flag(tp, 57765_PLUS) &&
-           GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5719)
+           tp->pci_chip_rev_id != CHIPREV_ID_5719_A0)
                tg3_flag_set(tp, USE_JUMBO_BDFLAG);
 
        if (!tg3_flag(tp, 5705_PLUS) ||
index 691539b..2ea456d 100644 (file)
@@ -2652,6 +2652,12 @@ struct ring_info {
        DEFINE_DMA_UNMAP_ADDR(mapping);
 };
 
+struct tg3_tx_ring_info {
+       struct sk_buff                  *skb;
+       DEFINE_DMA_UNMAP_ADDR(mapping);
+       bool                            fragmented;
+};
+
 struct tg3_link_config {
        /* Describes what we're trying to get. */
        u32                             advertising;
@@ -2816,7 +2822,7 @@ struct tg3_napi {
        u32                             last_tx_cons;
        u32                             prodmbox;
        struct tg3_tx_buffer_desc       *tx_ring;
-       struct ring_info                *tx_buffers;
+       struct tg3_tx_ring_info         *tx_buffers;
 
        dma_addr_t                      status_mapping;
        dma_addr_t                      rx_rcb_mapping;
@@ -2899,6 +2905,7 @@ enum TG3_FLAGS {
        TG3_FLAG_57765_PLUS,
        TG3_FLAG_APE_HAS_NCSI,
        TG3_FLAG_5717_PLUS,
+       TG3_FLAG_4K_FIFO_LIMIT,
 
        /* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */
        TG3_FLAG_NUMBER_OF_FLAGS,       /* Last entry in enum TG3_FLAGS */
index 9a6b382..71f3d1a 100644 (file)
@@ -528,6 +528,7 @@ static void tun_net_init(struct net_device *dev)
                dev->netdev_ops = &tap_netdev_ops;
                /* Ethernet TAP Device */
                ether_setup(dev);
+               dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
                random_ether_addr(dev->dev_addr);
 
index 5250288..c5c4b4d 100644 (file)
@@ -314,12 +314,11 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
        skb_pull(skb, 4);
 
        while (skb->len > 0) {
-               if ((short)(header & 0x0000ffff) !=
-                   ~((short)((header & 0xffff0000) >> 16))) {
+               if ((header & 0x07ff) != ((~header >> 16) & 0x07ff))
                        netdev_err(dev->net, "asix_rx_fixup() Bad Header Length\n");
-               }
+
                /* get the packet length */
-               size = (u16) (header & 0x0000ffff);
+               size = (u16) (header & 0x000007ff);
 
                if ((skb->len) - ((size + 1) & 0xfffe) == 0) {
                        u8 alignment = (unsigned long)skb->data & 0x3;
index 7f78db7..5b23767 100644 (file)
@@ -263,6 +263,8 @@ static void veth_setup(struct net_device *dev)
 {
        ether_setup(dev);
 
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+
        dev->netdev_ops = &veth_netdev_ops;
        dev->ethtool_ops = &veth_ethtool_ops;
        dev->features |= NETIF_F_LLTX;
index b25c922..eb20281 100644 (file)
@@ -1074,9 +1074,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 
        used = pvc_is_used(pvc);
 
-       if (type == ARPHRD_ETHER)
+       if (type == ARPHRD_ETHER) {
                dev = alloc_netdev(0, "pvceth%d", ether_setup);
-       else
+               dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       } else
                dev = alloc_netdev(0, "pvc%d", pvc_setup);
 
        if (!dev) {
index 55cf71f..e1b3e3c 100644 (file)
@@ -2823,6 +2823,7 @@ static struct net_device *_init_airo_card( unsigned short irq, int port,
        dev->wireless_data = &ai->wireless_data;
        dev->irq = irq;
        dev->base_addr = port;
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
        SET_NETDEV_DEV(dev, dmdev);
 
index d2293dc..3cab843 100644 (file)
@@ -28,7 +28,7 @@ config B43
 
 config B43_BCMA
        bool "Support for BCMA bus"
-       depends on B43 && BCMA && BROKEN
+       depends on B43 && BCMA
        default y
 
 config B43_SSB
index 64c3f65..05f6c7b 100644 (file)
@@ -244,10 +244,12 @@ void b43_bus_set_wldev(struct b43_bus_dev *dev, void *wldev)
 #ifdef CONFIG_B43_BCMA
        case B43_BUS_BCMA:
                bcma_set_drvdata(dev->bdev, wldev);
+               break;
 #endif
 #ifdef CONFIG_B43_SSB
        case B43_BUS_SSB:
                ssb_set_drvdata(dev->sdev, wldev);
+               break;
 #endif
        }
 }
index 032d466..26f1ab8 100644 (file)
@@ -5350,6 +5350,7 @@ static void b43_ssb_remove(struct ssb_device *sdev)
 {
        struct b43_wl *wl = ssb_get_devtypedata(sdev);
        struct b43_wldev *wldev = ssb_get_drvdata(sdev);
+       struct b43_bus_dev *dev = wldev->dev;
 
        /* We must cancel any work here before unregistering from ieee80211,
         * as the ieee80211 unreg will destroy the workqueue. */
@@ -5365,14 +5366,14 @@ static void b43_ssb_remove(struct ssb_device *sdev)
                ieee80211_unregister_hw(wl->hw);
        }
 
-       b43_one_core_detach(wldev->dev);
+       b43_one_core_detach(dev);
 
        if (list_empty(&wl->devlist)) {
                b43_leds_unregister(wl);
                /* Last core on the chip unregistered.
                 * We can destroy common struct b43_wl.
                 */
-               b43_wireless_exit(wldev->dev, wl);
+               b43_wireless_exit(dev, wl);
        }
 }
 
index d508482..89a116f 100644 (file)
@@ -855,6 +855,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local,
 
        iface = netdev_priv(dev);
        ether_setup(dev);
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
        /* kernel callbacks */
        if (iface) {
index 0372315..c77e054 100644 (file)
@@ -1596,7 +1596,7 @@ static void pn533_disconnect(struct usb_interface *interface)
        usb_free_urb(dev->out_urb);
        kfree(dev);
 
-       nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected");
+       nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected");
 }
 
 static struct usb_driver pn533_driver = {
index 77cb2a1..81525ae 100644 (file)
@@ -55,7 +55,7 @@ enum smbios_attr_enum {
        SMBIOS_ATTR_INSTANCE_SHOW,
 };
 
-static mode_t
+static size_t
 find_smbios_instance_string(struct pci_dev *pdev, char *buf,
                            enum smbios_attr_enum attribute)
 {
index 081c171..5ce5170 100644 (file)
@@ -397,7 +397,7 @@ struct amap_pdu_data_out {
 };
 
 struct be_cmd_bhs {
-       struct iscsi_cmd iscsi_hdr;
+       struct iscsi_scsi_req iscsi_hdr;
        unsigned char pad1[16];
        struct pdu_data_out iscsi_data_pdu;
        unsigned char pad2[BE_SENSE_INFO_SIZE -
@@ -428,7 +428,7 @@ struct be_nonio_bhs {
 };
 
 struct be_status_bhs {
-       struct iscsi_cmd iscsi_hdr;
+       struct iscsi_scsi_req iscsi_hdr;
        unsigned char pad1[16];
        /**
         * The plus 2 below is to hold the sense info length that gets
index 030a96c..9ae80cd 100644 (file)
@@ -332,11 +332,11 @@ int bnx2i_send_iscsi_login(struct bnx2i_conn *bnx2i_conn,
 {
        struct bnx2i_cmd *bnx2i_cmd;
        struct bnx2i_login_request *login_wqe;
-       struct iscsi_login *login_hdr;
+       struct iscsi_login_req *login_hdr;
        u32 dword;
 
        bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
-       login_hdr = (struct iscsi_login *)task->hdr;
+       login_hdr = (struct iscsi_login_req *)task->hdr;
        login_wqe = (struct bnx2i_login_request *)
                                                bnx2i_conn->ep->qp.sq_prod_qe;
 
@@ -1349,7 +1349,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
        struct bnx2i_cmd_response *resp_cqe;
        struct bnx2i_cmd *bnx2i_cmd;
        struct iscsi_task *task;
-       struct iscsi_cmd_rsp *hdr;
+       struct iscsi_scsi_rsp *hdr;
        u32 datalen = 0;
 
        resp_cqe = (struct bnx2i_cmd_response *)cqe;
@@ -1376,7 +1376,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
        }
        bnx2i_iscsi_unmap_sg_list(bnx2i_cmd);
 
-       hdr = (struct iscsi_cmd_rsp *)task->hdr;
+       hdr = (struct iscsi_scsi_rsp *)task->hdr;
        resp_cqe = (struct bnx2i_cmd_response *)cqe;
        hdr->opcode = resp_cqe->op_code;
        hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn);
index 5c55a75..cffd4d7 100644 (file)
@@ -1213,7 +1213,7 @@ static int bnx2i_task_xmit(struct iscsi_task *task)
        struct bnx2i_conn *bnx2i_conn = conn->dd_data;
        struct scsi_cmnd *sc = task->sc;
        struct bnx2i_cmd *cmd = task->dd_data;
-       struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr;
+       struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
 
        if (atomic_read(&bnx2i_conn->ep->num_active_cmds) + 1  >
            hba->max_sqes)
index d7a4120..256a999 100644 (file)
@@ -84,22 +84,6 @@ MODULE_PARM_DESC(debug_libiscsi_eh,
                                             __func__, ##arg);          \
        } while (0);
 
-/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
-#define SNA32_CHECK 2147483648UL
-
-static int iscsi_sna_lt(u32 n1, u32 n2)
-{
-       return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
-                           (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
-}
-
-/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
-static int iscsi_sna_lte(u32 n1, u32 n2)
-{
-       return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
-                           (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
-}
-
 inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
 {
        struct Scsi_Host *shost = conn->session->host;
@@ -360,7 +344,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
        struct iscsi_conn *conn = task->conn;
        struct iscsi_session *session = conn->session;
        struct scsi_cmnd *sc = task->sc;
-       struct iscsi_cmd *hdr;
+       struct iscsi_scsi_req *hdr;
        unsigned hdrlength, cmd_len;
        itt_t itt;
        int rc;
@@ -374,7 +358,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
                if (rc)
                        return rc;
        }
-       hdr = (struct iscsi_cmd *) task->hdr;
+       hdr = (struct iscsi_scsi_req *)task->hdr;
        itt = hdr->itt;
        memset(hdr, 0, sizeof(*hdr));
 
@@ -830,7 +814,7 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                               struct iscsi_task *task, char *data,
                               int datalen)
 {
-       struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
+       struct iscsi_scsi_rsp *rhdr = (struct iscsi_scsi_rsp *)hdr;
        struct iscsi_session *session = conn->session;
        struct scsi_cmnd *sc = task->sc;
 
index 499b7a9..32ee39a 100644 (file)
@@ -6205,6 +6205,7 @@ int ar6000_create_ap_interface(struct ar6_softc *ar, char *ap_ifname)
     
     ether_setup(dev);
     init_netdev(dev, ap_ifname);
+    dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 
     if (register_netdev(dev)) {
         AR_DEBUG_PRINTF(ATH_DEBUG_ERR,("ar6000_create_ap_interface: register_netdev failed\n"));
index 5711e7c..40e3d37 100644 (file)
@@ -24,8 +24,6 @@
 #define BRCMS_SET_SHORTSLOT_OVERRIDE           146
 
 
-#include <linux/interrupt.h>
-
 /* BMAC Note: High-only driver is no longer working in softirq context as it needs to block and
  * sleep so perimeter lock has to be a semaphore instead of spinlock. This requires timers to be
  * submitted to workqueue instead of being on kernel timer
index 5cb0f0e..b28794b 100644 (file)
@@ -31,5 +31,6 @@ config TCM_PSCSI
 
 source "drivers/target/loopback/Kconfig"
 source "drivers/target/tcm_fc/Kconfig"
+source "drivers/target/iscsi/Kconfig"
 
 endif
index 21df808..1060c7b 100644 (file)
@@ -24,5 +24,5 @@ obj-$(CONFIG_TCM_PSCSI)               += target_core_pscsi.o
 
 # Fabric modules
 obj-$(CONFIG_LOOPBACK_TARGET)  += loopback/
-
 obj-$(CONFIG_TCM_FC)           += tcm_fc/
+obj-$(CONFIG_ISCSI_TARGET)     += iscsi/
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
new file mode 100644 (file)
index 0000000..564ff4e
--- /dev/null
@@ -0,0 +1,8 @@
+config ISCSI_TARGET
+       tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
+       select CRYPTO
+       select CRYPTO_CRC32C
+       select CRYPTO_CRC32C_INTEL if X86
+       help
+       Say M here to enable the ConfigFS enabled Linux-iSCSI.org iSCSI
+       Target Mode Stack.
diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile
new file mode 100644 (file)
index 0000000..5b9a2cf
--- /dev/null
@@ -0,0 +1,20 @@
+iscsi_target_mod-y +=          iscsi_target_parameters.o \
+                               iscsi_target_seq_pdu_list.o \
+                               iscsi_target_tq.o \
+                               iscsi_target_auth.o \
+                               iscsi_target_datain_values.o \
+                               iscsi_target_device.o \
+                               iscsi_target_erl0.o \
+                               iscsi_target_erl1.o \
+                               iscsi_target_erl2.o \
+                               iscsi_target_login.o \
+                               iscsi_target_nego.o \
+                               iscsi_target_nodeattrib.o \
+                               iscsi_target_tmr.o \
+                               iscsi_target_tpg.o \
+                               iscsi_target_util.o \
+                               iscsi_target.o \
+                               iscsi_target_configfs.o \
+                               iscsi_target_stat.o
+
+obj-$(CONFIG_ISCSI_TARGET)     += iscsi_target_mod.o
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
new file mode 100644 (file)
index 0000000..14c81c4
--- /dev/null
@@ -0,0 +1,4559 @@
+/*******************************************************************************
+ * This file contains main functions related to the iSCSI Target Core Driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <linux/completion.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi_device.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tmr.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_configfs.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_tmr.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_stat.h"
+
+static LIST_HEAD(g_tiqn_list);
+static LIST_HEAD(g_np_list);
+static DEFINE_SPINLOCK(tiqn_lock);
+static DEFINE_SPINLOCK(np_lock);
+
+static struct idr tiqn_idr;
+struct idr sess_idr;
+struct mutex auth_id_lock;
+spinlock_t sess_idr_lock;
+
+struct iscsit_global *iscsit_global;
+
+struct kmem_cache *lio_cmd_cache;
+struct kmem_cache *lio_qr_cache;
+struct kmem_cache *lio_dr_cache;
+struct kmem_cache *lio_ooo_cache;
+struct kmem_cache *lio_r2t_cache;
+
+static int iscsit_handle_immediate_data(struct iscsi_cmd *,
+                       unsigned char *buf, u32);
+static int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
+
+struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *buf)
+{
+       struct iscsi_tiqn *tiqn = NULL;
+
+       spin_lock(&tiqn_lock);
+       list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) {
+               if (!strcmp(tiqn->tiqn, buf)) {
+
+                       spin_lock(&tiqn->tiqn_state_lock);
+                       if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) {
+                               tiqn->tiqn_access_count++;
+                               spin_unlock(&tiqn->tiqn_state_lock);
+                               spin_unlock(&tiqn_lock);
+                               return tiqn;
+                       }
+                       spin_unlock(&tiqn->tiqn_state_lock);
+               }
+       }
+       spin_unlock(&tiqn_lock);
+
+       return NULL;
+}
+
+static int iscsit_set_tiqn_shutdown(struct iscsi_tiqn *tiqn)
+{
+       spin_lock(&tiqn->tiqn_state_lock);
+       if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) {
+               tiqn->tiqn_state = TIQN_STATE_SHUTDOWN;
+               spin_unlock(&tiqn->tiqn_state_lock);
+               return 0;
+       }
+       spin_unlock(&tiqn->tiqn_state_lock);
+
+       return -1;
+}
+
+void iscsit_put_tiqn_for_login(struct iscsi_tiqn *tiqn)
+{
+       spin_lock(&tiqn->tiqn_state_lock);
+       tiqn->tiqn_access_count--;
+       spin_unlock(&tiqn->tiqn_state_lock);
+}
+
+/*
+ * Note that IQN formatting is expected to be done in userspace, and
+ * no explict IQN format checks are done here.
+ */
+struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
+{
+       struct iscsi_tiqn *tiqn = NULL;
+       int ret;
+
+       if (strlen(buf) > ISCSI_IQN_LEN) {
+               pr_err("Target IQN exceeds %d bytes\n",
+                               ISCSI_IQN_LEN);
+               return ERR_PTR(-EINVAL);
+       }
+
+       tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL);
+       if (!tiqn) {
+               pr_err("Unable to allocate struct iscsi_tiqn\n");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       sprintf(tiqn->tiqn, "%s", buf);
+       INIT_LIST_HEAD(&tiqn->tiqn_list);
+       INIT_LIST_HEAD(&tiqn->tiqn_tpg_list);
+       spin_lock_init(&tiqn->tiqn_state_lock);
+       spin_lock_init(&tiqn->tiqn_tpg_lock);
+       spin_lock_init(&tiqn->sess_err_stats.lock);
+       spin_lock_init(&tiqn->login_stats.lock);
+       spin_lock_init(&tiqn->logout_stats.lock);
+
+       if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) {
+               pr_err("idr_pre_get() for tiqn_idr failed\n");
+               kfree(tiqn);
+               return ERR_PTR(-ENOMEM);
+       }
+       tiqn->tiqn_state = TIQN_STATE_ACTIVE;
+
+       spin_lock(&tiqn_lock);
+       ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index);
+       if (ret < 0) {
+               pr_err("idr_get_new() failed for tiqn->tiqn_index\n");
+               spin_unlock(&tiqn_lock);
+               kfree(tiqn);
+               return ERR_PTR(ret);
+       }
+       list_add_tail(&tiqn->tiqn_list, &g_tiqn_list);
+       spin_unlock(&tiqn_lock);
+
+       pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn);
+
+       return tiqn;
+
+}
+
+static void iscsit_wait_for_tiqn(struct iscsi_tiqn *tiqn)
+{
+       /*
+        * Wait for accesses to said struct iscsi_tiqn to end.
+        */
+       spin_lock(&tiqn->tiqn_state_lock);
+       while (tiqn->tiqn_access_count != 0) {
+               spin_unlock(&tiqn->tiqn_state_lock);
+               msleep(10);
+               spin_lock(&tiqn->tiqn_state_lock);
+       }
+       spin_unlock(&tiqn->tiqn_state_lock);
+}
+
+void iscsit_del_tiqn(struct iscsi_tiqn *tiqn)
+{
+       /*
+        * iscsit_set_tiqn_shutdown sets tiqn->tiqn_state = TIQN_STATE_SHUTDOWN
+        * while holding tiqn->tiqn_state_lock.  This means that all subsequent
+        * attempts to access this struct iscsi_tiqn will fail from both transport
+        * fabric and control code paths.
+        */
+       if (iscsit_set_tiqn_shutdown(tiqn) < 0) {
+               pr_err("iscsit_set_tiqn_shutdown() failed\n");
+               return;
+       }
+
+       iscsit_wait_for_tiqn(tiqn);
+
+       spin_lock(&tiqn_lock);
+       list_del(&tiqn->tiqn_list);
+       idr_remove(&tiqn_idr, tiqn->tiqn_index);
+       spin_unlock(&tiqn_lock);
+
+       pr_debug("CORE[0] - Deleted iSCSI Target IQN: %s\n",
+                       tiqn->tiqn);
+       kfree(tiqn);
+}
+
+int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+{
+       int ret;
+       /*
+        * Determine if the network portal is accepting storage traffic.
+        */
+       spin_lock_bh(&np->np_thread_lock);
+       if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+               spin_unlock_bh(&np->np_thread_lock);
+               return -1;
+       }
+       if (np->np_login_tpg) {
+               pr_err("np->np_login_tpg() is not NULL!\n");
+               spin_unlock_bh(&np->np_thread_lock);
+               return -1;
+       }
+       spin_unlock_bh(&np->np_thread_lock);
+       /*
+        * Determine if the portal group is accepting storage traffic.
+        */
+       spin_lock_bh(&tpg->tpg_state_lock);
+       if (tpg->tpg_state != TPG_STATE_ACTIVE) {
+               spin_unlock_bh(&tpg->tpg_state_lock);
+               return -1;
+       }
+       spin_unlock_bh(&tpg->tpg_state_lock);
+
+       /*
+        * Here we serialize access across the TIQN+TPG Tuple.
+        */
+       ret = mutex_lock_interruptible(&tpg->np_login_lock);
+       if ((ret != 0) || signal_pending(current))
+               return -1;
+
+       spin_lock_bh(&np->np_thread_lock);
+       np->np_login_tpg = tpg;
+       spin_unlock_bh(&np->np_thread_lock);
+
+       return 0;
+}
+
+int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+{
+       struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+       spin_lock_bh(&np->np_thread_lock);
+       np->np_login_tpg = NULL;
+       spin_unlock_bh(&np->np_thread_lock);
+
+       mutex_unlock(&tpg->np_login_lock);
+
+       if (tiqn)
+               iscsit_put_tiqn_for_login(tiqn);
+
+       return 0;
+}
+
+static struct iscsi_np *iscsit_get_np(
+       struct __kernel_sockaddr_storage *sockaddr,
+       int network_transport)
+{
+       struct sockaddr_in *sock_in, *sock_in_e;
+       struct sockaddr_in6 *sock_in6, *sock_in6_e;
+       struct iscsi_np *np;
+       int ip_match = 0;
+       u16 port;
+
+       spin_lock_bh(&np_lock);
+       list_for_each_entry(np, &g_np_list, np_list) {
+               spin_lock(&np->np_thread_lock);
+               if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+                       spin_unlock(&np->np_thread_lock);
+                       continue;
+               }
+
+               if (sockaddr->ss_family == AF_INET6) {
+                       sock_in6 = (struct sockaddr_in6 *)sockaddr;
+                       sock_in6_e = (struct sockaddr_in6 *)&np->np_sockaddr;
+
+                       if (!memcmp((void *)&sock_in6->sin6_addr.in6_u,
+                                   (void *)&sock_in6_e->sin6_addr.in6_u,
+                                   sizeof(struct in6_addr)))
+                               ip_match = 1;
+
+                       port = ntohs(sock_in6->sin6_port);
+               } else {
+                       sock_in = (struct sockaddr_in *)sockaddr;
+                       sock_in_e = (struct sockaddr_in *)&np->np_sockaddr;
+
+                       if (sock_in->sin_addr.s_addr ==
+                           sock_in_e->sin_addr.s_addr)
+                               ip_match = 1;
+
+                       port = ntohs(sock_in->sin_port);
+               }
+
+               if ((ip_match == 1) && (np->np_port == port) &&
+                   (np->np_network_transport == network_transport)) {
+                       /*
+                        * Increment the np_exports reference count now to
+                        * prevent iscsit_del_np() below from being called
+                        * while iscsi_tpg_add_network_portal() is called.
+                        */
+                       np->np_exports++;
+                       spin_unlock(&np->np_thread_lock);
+                       spin_unlock_bh(&np_lock);
+                       return np;
+               }
+               spin_unlock(&np->np_thread_lock);
+       }
+       spin_unlock_bh(&np_lock);
+
+       return NULL;
+}
+
+struct iscsi_np *iscsit_add_np(
+       struct __kernel_sockaddr_storage *sockaddr,
+       char *ip_str,
+       int network_transport)
+{
+       struct sockaddr_in *sock_in;
+       struct sockaddr_in6 *sock_in6;
+       struct iscsi_np *np;
+       int ret;
+       /*
+        * Locate the existing struct iscsi_np if already active..
+        */
+       np = iscsit_get_np(sockaddr, network_transport);
+       if (np)
+               return np;
+
+       np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL);
+       if (!np) {
+               pr_err("Unable to allocate memory for struct iscsi_np\n");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       np->np_flags |= NPF_IP_NETWORK;
+       if (sockaddr->ss_family == AF_INET6) {
+               sock_in6 = (struct sockaddr_in6 *)sockaddr;
+               snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str);
+               np->np_port = ntohs(sock_in6->sin6_port);
+       } else {
+               sock_in = (struct sockaddr_in *)sockaddr;
+               sprintf(np->np_ip, "%s", ip_str);
+               np->np_port = ntohs(sock_in->sin_port);
+       }
+
+       np->np_network_transport = network_transport;
+       spin_lock_init(&np->np_thread_lock);
+       init_completion(&np->np_restart_comp);
+       INIT_LIST_HEAD(&np->np_list);
+
+       ret = iscsi_target_setup_login_socket(np, sockaddr);
+       if (ret != 0) {
+               kfree(np);
+               return ERR_PTR(ret);
+       }
+
+       np->np_thread = kthread_run(iscsi_target_login_thread, np, "iscsi_np");
+       if (IS_ERR(np->np_thread)) {
+               pr_err("Unable to create kthread: iscsi_np\n");
+               ret = PTR_ERR(np->np_thread);
+               kfree(np);
+               return ERR_PTR(ret);
+       }
+       /*
+        * Increment the np_exports reference count now to prevent
+        * iscsit_del_np() below from being run while a new call to
+        * iscsi_tpg_add_network_portal() for a matching iscsi_np is
+        * active.  We don't need to hold np->np_thread_lock at this
+        * point because iscsi_np has not been added to g_np_list yet.
+        */
+       np->np_exports = 1;
+
+       spin_lock_bh(&np_lock);
+       list_add_tail(&np->np_list, &g_np_list);
+       spin_unlock_bh(&np_lock);
+
+       pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n",
+               np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ?
+               "TCP" : "SCTP");
+
+       return np;
+}
+
+int iscsit_reset_np_thread(
+       struct iscsi_np *np,
+       struct iscsi_tpg_np *tpg_np,
+       struct iscsi_portal_group *tpg)
+{
+       spin_lock_bh(&np->np_thread_lock);
+       if (tpg && tpg_np) {
+               /*
+                * The reset operation need only be performed when the
+                * passed struct iscsi_portal_group has a login in progress
+                * to one of the network portals.
+                */
+               if (tpg_np->tpg_np->np_login_tpg != tpg) {
+                       spin_unlock_bh(&np->np_thread_lock);
+                       return 0;
+               }
+       }
+       if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) {
+               spin_unlock_bh(&np->np_thread_lock);
+               return 0;
+       }
+       np->np_thread_state = ISCSI_NP_THREAD_RESET;
+
+       if (np->np_thread) {
+               spin_unlock_bh(&np->np_thread_lock);
+               send_sig(SIGINT, np->np_thread, 1);
+               wait_for_completion(&np->np_restart_comp);
+               spin_lock_bh(&np->np_thread_lock);
+       }
+       spin_unlock_bh(&np->np_thread_lock);
+
+       return 0;
+}
+
+int iscsit_del_np_comm(struct iscsi_np *np)
+{
+       if (!np->np_socket)
+               return 0;
+
+       /*
+        * Some network transports allocate their own struct sock->file,
+        * see  if we need to free any additional allocated resources.
+        */
+       if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+               kfree(np->np_socket->file);
+               np->np_socket->file = NULL;
+       }
+
+       sock_release(np->np_socket);
+       return 0;
+}
+
+int iscsit_del_np(struct iscsi_np *np)
+{
+       spin_lock_bh(&np->np_thread_lock);
+       np->np_exports--;
+       if (np->np_exports) {
+               spin_unlock_bh(&np->np_thread_lock);
+               return 0;
+       }
+       np->np_thread_state = ISCSI_NP_THREAD_SHUTDOWN;
+       spin_unlock_bh(&np->np_thread_lock);
+
+       if (np->np_thread) {
+               /*
+                * We need to send the signal to wakeup Linux/Net
+                * which may be sleeping in sock_accept()..
+                */
+               send_sig(SIGINT, np->np_thread, 1);
+               kthread_stop(np->np_thread);
+       }
+       iscsit_del_np_comm(np);
+
+       spin_lock_bh(&np_lock);
+       list_del(&np->np_list);
+       spin_unlock_bh(&np_lock);
+
+       pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n",
+               np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ?
+               "TCP" : "SCTP");
+
+       kfree(np);
+       return 0;
+}
+
+static int __init iscsi_target_init_module(void)
+{
+       int ret = 0;
+
+       pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
+
+       iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL);
+       if (!iscsit_global) {
+               pr_err("Unable to allocate memory for iscsit_global\n");
+               return -1;
+       }
+       mutex_init(&auth_id_lock);
+       spin_lock_init(&sess_idr_lock);
+       idr_init(&tiqn_idr);
+       idr_init(&sess_idr);
+
+       ret = iscsi_target_register_configfs();
+       if (ret < 0)
+               goto out;
+
+       ret = iscsi_thread_set_init();
+       if (ret < 0)
+               goto configfs_out;
+
+       if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) !=
+                       TARGET_THREAD_SET_COUNT) {
+               pr_err("iscsi_allocate_thread_sets() returned"
+                       " unexpected value!\n");
+               goto ts_out1;
+       }
+
+       lio_cmd_cache = kmem_cache_create("lio_cmd_cache",
+                       sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd),
+                       0, NULL);
+       if (!lio_cmd_cache) {
+               pr_err("Unable to kmem_cache_create() for"
+                               " lio_cmd_cache\n");
+               goto ts_out2;
+       }
+
+       lio_qr_cache = kmem_cache_create("lio_qr_cache",
+                       sizeof(struct iscsi_queue_req),
+                       __alignof__(struct iscsi_queue_req), 0, NULL);
+       if (!lio_qr_cache) {
+               pr_err("nable to kmem_cache_create() for"
+                               " lio_qr_cache\n");
+               goto cmd_out;
+       }
+
+       lio_dr_cache = kmem_cache_create("lio_dr_cache",
+                       sizeof(struct iscsi_datain_req),
+                       __alignof__(struct iscsi_datain_req), 0, NULL);
+       if (!lio_dr_cache) {
+               pr_err("Unable to kmem_cache_create() for"
+                               " lio_dr_cache\n");
+               goto qr_out;
+       }
+
+       lio_ooo_cache = kmem_cache_create("lio_ooo_cache",
+                       sizeof(struct iscsi_ooo_cmdsn),
+                       __alignof__(struct iscsi_ooo_cmdsn), 0, NULL);
+       if (!lio_ooo_cache) {
+               pr_err("Unable to kmem_cache_create() for"
+                               " lio_ooo_cache\n");
+               goto dr_out;
+       }
+
+       lio_r2t_cache = kmem_cache_create("lio_r2t_cache",
+                       sizeof(struct iscsi_r2t), __alignof__(struct iscsi_r2t),
+                       0, NULL);
+       if (!lio_r2t_cache) {
+               pr_err("Unable to kmem_cache_create() for"
+                               " lio_r2t_cache\n");
+               goto ooo_out;
+       }
+
+       if (iscsit_load_discovery_tpg() < 0)
+               goto r2t_out;
+
+       return ret;
+r2t_out:
+       kmem_cache_destroy(lio_r2t_cache);
+ooo_out:
+       kmem_cache_destroy(lio_ooo_cache);
+dr_out:
+       kmem_cache_destroy(lio_dr_cache);
+qr_out:
+       kmem_cache_destroy(lio_qr_cache);
+cmd_out:
+       kmem_cache_destroy(lio_cmd_cache);
+ts_out2:
+       iscsi_deallocate_thread_sets();
+ts_out1:
+       iscsi_thread_set_free();
+configfs_out:
+       iscsi_target_deregister_configfs();
+out:
+       kfree(iscsit_global);
+       return -ENOMEM;
+}
+
+static void __exit iscsi_target_cleanup_module(void)
+{
+       iscsi_deallocate_thread_sets();
+       iscsi_thread_set_free();
+       iscsit_release_discovery_tpg();
+       kmem_cache_destroy(lio_cmd_cache);
+       kmem_cache_destroy(lio_qr_cache);
+       kmem_cache_destroy(lio_dr_cache);
+       kmem_cache_destroy(lio_ooo_cache);
+       kmem_cache_destroy(lio_r2t_cache);
+
+       iscsi_target_deregister_configfs();
+
+       kfree(iscsit_global);
+}
+
+int iscsit_add_reject(
+       u8 reason,
+       int fail_conn,
+       unsigned char *buf,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_cmd *cmd;
+       struct iscsi_reject *hdr;
+       int ret;
+
+       cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+       if (!cmd)
+               return -1;
+
+       cmd->iscsi_opcode = ISCSI_OP_REJECT;
+       if (fail_conn)
+               cmd->cmd_flags |= ICF_REJECT_FAIL_CONN;
+
+       hdr     = (struct iscsi_reject *) cmd->pdu;
+       hdr->reason = reason;
+
+       cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+       if (!cmd->buf_ptr) {
+               pr_err("Unable to allocate memory for cmd->buf_ptr\n");
+               iscsit_release_cmd(cmd);
+               return -1;
+       }
+       memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN);
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       cmd->i_state = ISTATE_SEND_REJECT;
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+       ret = wait_for_completion_interruptible(&cmd->reject_comp);
+       if (ret != 0)
+               return -1;
+
+       return (!fail_conn) ? 0 : -1;
+}
+
+int iscsit_add_reject_from_cmd(
+       u8 reason,
+       int fail_conn,
+       int add_to_conn,
+       unsigned char *buf,
+       struct iscsi_cmd *cmd)
+{
+       struct iscsi_conn *conn;
+       struct iscsi_reject *hdr;
+       int ret;
+
+       if (!cmd->conn) {
+               pr_err("cmd->conn is NULL for ITT: 0x%08x\n",
+                               cmd->init_task_tag);
+               return -1;
+       }
+       conn = cmd->conn;
+
+       cmd->iscsi_opcode = ISCSI_OP_REJECT;
+       if (fail_conn)
+               cmd->cmd_flags |= ICF_REJECT_FAIL_CONN;
+
+       hdr     = (struct iscsi_reject *) cmd->pdu;
+       hdr->reason = reason;
+
+       cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+       if (!cmd->buf_ptr) {
+               pr_err("Unable to allocate memory for cmd->buf_ptr\n");
+               iscsit_release_cmd(cmd);
+               return -1;
+       }
+       memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN);
+
+       if (add_to_conn) {
+               spin_lock_bh(&conn->cmd_lock);
+               list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+               spin_unlock_bh(&conn->cmd_lock);
+       }
+
+       cmd->i_state = ISTATE_SEND_REJECT;
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+       ret = wait_for_completion_interruptible(&cmd->reject_comp);
+       if (ret != 0)
+               return -1;
+
+       return (!fail_conn) ? 0 : -1;
+}
+
+/*
+ * Map some portion of the allocated scatterlist to an iovec, suitable for
+ * kernel sockets to copy data in/out. This handles both pages and slab-allocated
+ * buffers, since we have been tricky and mapped t_mem_sg to the buffer in
+ * either case (see iscsit_alloc_buffs)
+ */
+static int iscsit_map_iovec(
+       struct iscsi_cmd *cmd,
+       struct kvec *iov,
+       u32 data_offset,
+       u32 data_length)
+{
+       u32 i = 0;
+       struct scatterlist *sg;
+       unsigned int page_off;
+
+       /*
+        * We have a private mapping of the allocated pages in t_mem_sg.
+        * At this point, we also know each contains a page.
+        */
+       sg = &cmd->t_mem_sg[data_offset / PAGE_SIZE];
+       page_off = (data_offset % PAGE_SIZE);
+
+       cmd->first_data_sg = sg;
+       cmd->first_data_sg_off = page_off;
+
+       while (data_length) {
+               u32 cur_len = min_t(u32, data_length, sg->length - page_off);
+
+               iov[i].iov_base = kmap(sg_page(sg)) + sg->offset + page_off;
+               iov[i].iov_len = cur_len;
+
+               data_length -= cur_len;
+               page_off = 0;
+               sg = sg_next(sg);
+               i++;
+       }
+
+       cmd->kmapped_nents = i;
+
+       return i;
+}
+
+static void iscsit_unmap_iovec(struct iscsi_cmd *cmd)
+{
+       u32 i;
+       struct scatterlist *sg;
+
+       sg = cmd->first_data_sg;
+
+       for (i = 0; i < cmd->kmapped_nents; i++)
+               kunmap(sg_page(&sg[i]));
+}
+
+static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn)
+{
+       struct iscsi_cmd *cmd;
+
+       conn->exp_statsn = exp_statsn;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+               spin_lock(&cmd->istate_lock);
+               if ((cmd->i_state == ISTATE_SENT_STATUS) &&
+                   (cmd->stat_sn < exp_statsn)) {
+                       cmd->i_state = ISTATE_REMOVE;
+                       spin_unlock(&cmd->istate_lock);
+                       iscsit_add_cmd_to_immediate_queue(cmd, conn,
+                                               cmd->i_state);
+                       continue;
+               }
+               spin_unlock(&cmd->istate_lock);
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+}
+
+static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
+{
+       u32 iov_count = (cmd->se_cmd.t_data_nents == 0) ? 1 :
+                               cmd->se_cmd.t_data_nents;
+
+       iov_count += TRANSPORT_IOV_DATA_BUFFER;
+
+       cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL);
+       if (!cmd->iov_data) {
+               pr_err("Unable to allocate cmd->iov_data\n");
+               return -ENOMEM;
+       }
+
+       cmd->orig_iov_data_count = iov_count;
+       return 0;
+}
+
+static int iscsit_alloc_buffs(struct iscsi_cmd *cmd)
+{
+       struct scatterlist *sgl;
+       u32 length = cmd->se_cmd.data_length;
+       int nents = DIV_ROUND_UP(length, PAGE_SIZE);
+       int i = 0, ret;
+       /*
+        * If no SCSI payload is present, allocate the default iovecs used for
+        * iSCSI PDU Header
+        */
+       if (!length)
+               return iscsit_allocate_iovecs(cmd);
+
+       sgl = kzalloc(sizeof(*sgl) * nents, GFP_KERNEL);
+       if (!sgl)
+               return -ENOMEM;
+
+       sg_init_table(sgl, nents);
+
+       while (length) {
+               int buf_size = min_t(int, length, PAGE_SIZE);
+               struct page *page;
+
+               page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (!page)
+                       goto page_alloc_failed;
+
+               sg_set_page(&sgl[i], page, buf_size, 0);
+
+               length -= buf_size;
+               i++;
+       }
+
+       cmd->t_mem_sg = sgl;
+       cmd->t_mem_sg_nents = nents;
+
+       /* BIDI ops not supported */
+
+       /* Tell the core about our preallocated memory */
+       transport_generic_map_mem_to_cmd(&cmd->se_cmd, sgl, nents, NULL, 0);
+       /*
+        * Allocate iovecs for SCSI payload after transport_generic_map_mem_to_cmd
+        * so that cmd->se_cmd.t_tasks_se_num has been set.
+        */
+        ret = iscsit_allocate_iovecs(cmd);
+        if (ret < 0)
+               goto page_alloc_failed;
+
+       return 0;
+
+page_alloc_failed:
+       while (i >= 0) {
+               __free_page(sg_page(&sgl[i]));
+               i--;
+       }
+       kfree(cmd->t_mem_sg);
+       cmd->t_mem_sg = NULL;
+       return -ENOMEM;
+}
+
+static int iscsit_handle_scsi_cmd(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       int     data_direction, cmdsn_ret = 0, immed_ret, ret, transport_ret;
+       int     dump_immediate_data = 0, send_check_condition = 0, payload_length;
+       struct iscsi_cmd        *cmd = NULL;
+       struct iscsi_scsi_req *hdr;
+
+       spin_lock_bh(&conn->sess->session_stats_lock);
+       conn->sess->cmd_pdus++;
+       if (conn->sess->se_sess->se_node_acl) {
+               spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+               conn->sess->se_sess->se_node_acl->num_cmds++;
+               spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+       }
+       spin_unlock_bh(&conn->sess->session_stats_lock);
+
+       hdr                     = (struct iscsi_scsi_req *) buf;
+       payload_length          = ntoh24(hdr->dlength);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->data_length        = be32_to_cpu(hdr->data_length);
+       hdr->cmdsn              = be32_to_cpu(hdr->cmdsn);
+       hdr->exp_statsn         = be32_to_cpu(hdr->exp_statsn);
+
+       /* FIXME; Add checks for AdditionalHeaderSegment */
+
+       if (!(hdr->flags & ISCSI_FLAG_CMD_WRITE) &&
+           !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) {
+               pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL"
+                               " not set. Bad iSCSI Initiator.\n");
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+                               buf, conn);
+       }
+
+       if (((hdr->flags & ISCSI_FLAG_CMD_READ) ||
+            (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) {
+               /*
+                * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2)
+                * that adds support for RESERVE/RELEASE.  There is a bug
+                * add with this new functionality that sets R/W bits when
+                * neither CDB carries any READ or WRITE datapayloads.
+                */
+               if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) {
+                       hdr->flags &= ~ISCSI_FLAG_CMD_READ;
+                       hdr->flags &= ~ISCSI_FLAG_CMD_WRITE;
+                       goto done;
+               }
+
+               pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE"
+                       " set when Expected Data Transfer Length is 0 for"
+                       " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]);
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+                               buf, conn);
+       }
+done:
+
+       if (!(hdr->flags & ISCSI_FLAG_CMD_READ) &&
+           !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) {
+               pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE"
+                       " MUST be set if Expected Data Transfer Length is not 0."
+                       " Bad iSCSI Initiator\n");
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+                               buf, conn);
+       }
+
+       if ((hdr->flags & ISCSI_FLAG_CMD_READ) &&
+           (hdr->flags & ISCSI_FLAG_CMD_WRITE)) {
+               pr_err("Bidirectional operations not supported!\n");
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+                               buf, conn);
+       }
+
+       if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+               pr_err("Illegally set Immediate Bit in iSCSI Initiator"
+                               " Scsi Command PDU.\n");
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+                               buf, conn);
+       }
+
+       if (payload_length && !conn->sess->sess_ops->ImmediateData) {
+               pr_err("ImmediateData=No but DataSegmentLength=%u,"
+                       " protocol error.\n", payload_length);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                               buf, conn);
+       }
+
+       if ((hdr->data_length == payload_length) &&
+           (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) {
+               pr_err("Expected Data Transfer Length and Length of"
+                       " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL"
+                       " bit is not set protocol error\n");
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                               buf, conn);
+       }
+
+       if (payload_length > hdr->data_length) {
+               pr_err("DataSegmentLength: %u is greater than"
+                       " EDTL: %u, protocol error.\n", payload_length,
+                               hdr->data_length);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                               buf, conn);
+       }
+
+       if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+               pr_err("DataSegmentLength: %u is greater than"
+                       " MaxRecvDataSegmentLength: %u, protocol error.\n",
+                       payload_length, conn->conn_ops->MaxRecvDataSegmentLength);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                               buf, conn);
+       }
+
+       if (payload_length > conn->sess->sess_ops->FirstBurstLength) {
+               pr_err("DataSegmentLength: %u is greater than"
+                       " FirstBurstLength: %u, protocol error.\n",
+                       payload_length, conn->sess->sess_ops->FirstBurstLength);
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+                                       buf, conn);
+       }
+
+       data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE :
+                        (hdr->flags & ISCSI_FLAG_CMD_READ) ? DMA_FROM_DEVICE :
+                         DMA_NONE;
+
+       cmd = iscsit_allocate_se_cmd(conn, hdr->data_length, data_direction,
+                               (hdr->flags & ISCSI_FLAG_CMD_ATTR_MASK));
+       if (!cmd)
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1,
+                                       buf, conn);
+
+       pr_debug("Got SCSI Command, ITT: 0x%08x, CmdSN: 0x%08x,"
+               " ExpXferLen: %u, Length: %u, CID: %hu\n", hdr->itt,
+               hdr->cmdsn, hdr->data_length, payload_length, conn->cid);
+
+       cmd->iscsi_opcode       = ISCSI_OP_SCSI_CMD;
+       cmd->i_state            = ISTATE_NEW_CMD;
+       cmd->immediate_cmd      = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+       cmd->immediate_data     = (payload_length) ? 1 : 0;
+       cmd->unsolicited_data   = ((!(hdr->flags & ISCSI_FLAG_CMD_FINAL) &&
+                                    (hdr->flags & ISCSI_FLAG_CMD_WRITE)) ? 1 : 0);
+       if (cmd->unsolicited_data)
+               cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA;
+
+       conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+       if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+               spin_lock_bh(&conn->sess->ttt_lock);
+               cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+               if (cmd->targ_xfer_tag == 0xFFFFFFFF)
+                       cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+               spin_unlock_bh(&conn->sess->ttt_lock);
+       } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE)
+               cmd->targ_xfer_tag = 0xFFFFFFFF;
+       cmd->cmd_sn             = hdr->cmdsn;
+       cmd->exp_stat_sn        = hdr->exp_statsn;
+       cmd->first_burst_len    = payload_length;
+
+       if (cmd->data_direction == DMA_FROM_DEVICE) {
+               struct iscsi_datain_req *dr;
+
+               dr = iscsit_allocate_datain_req();
+               if (!dr)
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                                       1, 1, buf, cmd);
+
+               iscsit_attach_datain_req(cmd, dr);
+       }
+
+       /*
+        * The CDB is going to an se_device_t.
+        */
+       ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb,
+                               get_unaligned_le64(&hdr->lun));
+       if (ret < 0) {
+               if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) {
+                       pr_debug("Responding to non-acl'ed,"
+                               " non-existent or non-exported iSCSI LUN:"
+                               " 0x%016Lx\n", get_unaligned_le64(&hdr->lun));
+               }
+               if (ret == PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES)
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                                       1, 1, buf, cmd);
+
+               send_check_condition = 1;
+               goto attach_cmd;
+       }
+       /*
+        * The Initiator Node has access to the LUN (the addressing method
+        * is handled inside of iscsit_get_lun_for_cmd()).  Now it's time to
+        * allocate 1->N transport tasks (depending on sector count and
+        * maximum request size the physical HBA(s) can handle.
+        */
+       transport_ret = transport_generic_allocate_tasks(&cmd->se_cmd, hdr->cdb);
+       if (transport_ret == -ENOMEM) {
+               return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                               1, 1, buf, cmd);
+       } else if (transport_ret == -EINVAL) {
+               /*
+                * Unsupported SAM Opcode.  CHECK_CONDITION will be sent
+                * in iscsit_execute_cmd() during the CmdSN OOO Execution
+                * Mechinism.
+                */
+               send_check_condition = 1;
+       } else {
+               if (iscsit_decide_list_to_build(cmd, payload_length) < 0)
+                       return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                               1, 1, buf, cmd);
+       }
+
+attach_cmd:
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+       /*
+        * Check if we need to delay processing because of ALUA
+        * Active/NonOptimized primary access state..
+        */
+       core_alua_check_nonop_delay(&cmd->se_cmd);
+       /*
+        * Allocate and setup SGL used with transport_generic_map_mem_to_cmd().
+        * also call iscsit_allocate_iovecs()
+        */
+       ret = iscsit_alloc_buffs(cmd);
+       if (ret < 0)
+               return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                               1, 1, buf, cmd);
+       /*
+        * Check the CmdSN against ExpCmdSN/MaxCmdSN here if
+        * the Immediate Bit is not set, and no Immediate
+        * Data is attached.
+        *
+        * A PDU/CmdSN carrying Immediate Data can only
+        * be processed after the DataCRC has passed.
+        * If the DataCRC fails, the CmdSN MUST NOT
+        * be acknowledged. (See below)
+        */
+       if (!cmd->immediate_data) {
+               cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+               if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+                       return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_PROTOCOL_ERROR,
+                               1, 0, buf, cmd);
+       }
+
+       iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+       /*
+        * If no Immediate Data is attached, it's OK to return now.
+        */
+       if (!cmd->immediate_data) {
+               if (send_check_condition)
+                       return 0;
+
+               if (cmd->unsolicited_data) {
+                       iscsit_set_dataout_sequence_values(cmd);
+
+                       spin_lock_bh(&cmd->dataout_timeout_lock);
+                       iscsit_start_dataout_timer(cmd, cmd->conn);
+                       spin_unlock_bh(&cmd->dataout_timeout_lock);
+               }
+
+               return 0;
+       }
+
+       /*
+        * Early CHECK_CONDITIONs never make it to the transport processing
+        * thread.  They are processed in CmdSN order by
+        * iscsit_check_received_cmdsn() below.
+        */
+       if (send_check_condition) {
+               immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION;
+               dump_immediate_data = 1;
+               goto after_immediate_data;
+       }
+       /*
+        * Call directly into transport_generic_new_cmd() to perform
+        * the backend memory allocation.
+        */
+       ret = transport_generic_new_cmd(&cmd->se_cmd);
+       if ((ret < 0) || (cmd->se_cmd.se_cmd_flags & SCF_SE_CMD_FAILED)) {
+               immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION;
+               dump_immediate_data = 1;
+               goto after_immediate_data;
+       }
+
+       immed_ret = iscsit_handle_immediate_data(cmd, buf, payload_length);
+after_immediate_data:
+       if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) {
+               /*
+                * A PDU/CmdSN carrying Immediate Data passed
+                * DataCRC, check against ExpCmdSN/MaxCmdSN if
+                * Immediate Bit is not set.
+                */
+               cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+               /*
+                * Special case for Unsupported SAM WRITE Opcodes
+                * and ImmediateData=Yes.
+                */
+               if (dump_immediate_data) {
+                       if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+                               return -1;
+               } else if (cmd->unsolicited_data) {
+                       iscsit_set_dataout_sequence_values(cmd);
+
+                       spin_lock_bh(&cmd->dataout_timeout_lock);
+                       iscsit_start_dataout_timer(cmd, cmd->conn);
+                       spin_unlock_bh(&cmd->dataout_timeout_lock);
+               }
+
+               if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+                       return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_PROTOCOL_ERROR,
+                               1, 0, buf, cmd);
+
+       } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) {
+               /*
+                * Immediate Data failed DataCRC and ERL>=1,
+                * silently drop this PDU and let the initiator
+                * plug the CmdSN gap.
+                *
+                * FIXME: Send Unsolicited NOPIN with reserved
+                * TTT here to help the initiator figure out
+                * the missing CmdSN, although they should be
+                * intelligent enough to determine the missing
+                * CmdSN and issue a retry to plug the sequence.
+                */
+               cmd->i_state = ISTATE_REMOVE;
+               iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+       } else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */
+               return -1;
+
+       return 0;
+}
+
+static u32 iscsit_do_crypto_hash_sg(
+       struct hash_desc *hash,
+       struct iscsi_cmd *cmd,
+       u32 data_offset,
+       u32 data_length,
+       u32 padding,
+       u8 *pad_bytes)
+{
+       u32 data_crc;
+       u32 i;
+       struct scatterlist *sg;
+       unsigned int page_off;
+
+       crypto_hash_init(hash);
+
+       sg = cmd->first_data_sg;
+       page_off = cmd->first_data_sg_off;
+
+       i = 0;
+       while (data_length) {
+               u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off));
+
+               crypto_hash_update(hash, &sg[i], cur_len);
+
+               data_length -= cur_len;
+               page_off = 0;
+               i++;
+       }
+
+       if (padding) {
+               struct scatterlist pad_sg;
+
+               sg_init_one(&pad_sg, pad_bytes, padding);
+               crypto_hash_update(hash, &pad_sg, padding);
+       }
+       crypto_hash_final(hash, (u8 *) &data_crc);
+
+       return data_crc;
+}
+
+static void iscsit_do_crypto_hash_buf(
+       struct hash_desc *hash,
+       unsigned char *buf,
+       u32 payload_length,
+       u32 padding,
+       u8 *pad_bytes,
+       u8 *data_crc)
+{
+       struct scatterlist sg;
+
+       crypto_hash_init(hash);
+
+       sg_init_one(&sg, (u8 *)buf, payload_length);
+       crypto_hash_update(hash, &sg, payload_length);
+
+       if (padding) {
+               sg_init_one(&sg, pad_bytes, padding);
+               crypto_hash_update(hash, &sg, padding);
+       }
+       crypto_hash_final(hash, data_crc);
+}
+
+static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf)
+{
+       int iov_ret, ooo_cmdsn = 0, ret;
+       u8 data_crc_failed = 0;
+       u32 checksum, iov_count = 0, padding = 0, rx_got = 0;
+       u32 rx_size = 0, payload_length;
+       struct iscsi_cmd *cmd = NULL;
+       struct se_cmd *se_cmd;
+       struct iscsi_data *hdr;
+       struct kvec *iov;
+       unsigned long flags;
+
+       hdr                     = (struct iscsi_data *) buf;
+       payload_length          = ntoh24(hdr->dlength);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->ttt                = be32_to_cpu(hdr->ttt);
+       hdr->exp_statsn         = be32_to_cpu(hdr->exp_statsn);
+       hdr->datasn             = be32_to_cpu(hdr->datasn);
+       hdr->offset             = be32_to_cpu(hdr->offset);
+
+       if (!payload_length) {
+               pr_err("DataOUT payload is ZERO, protocol error.\n");
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+
+       /* iSCSI write */
+       spin_lock_bh(&conn->sess->session_stats_lock);
+       conn->sess->rx_data_octets += payload_length;
+       if (conn->sess->se_sess->se_node_acl) {
+               spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+               conn->sess->se_sess->se_node_acl->write_bytes += payload_length;
+               spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+       }
+       spin_unlock_bh(&conn->sess->session_stats_lock);
+
+       if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+               pr_err("DataSegmentLength: %u is greater than"
+                       " MaxRecvDataSegmentLength: %u\n", payload_length,
+                       conn->conn_ops->MaxRecvDataSegmentLength);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+
+       cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt,
+                       payload_length);
+       if (!cmd)
+               return 0;
+
+       pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x,"
+               " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
+               hdr->itt, hdr->ttt, hdr->datasn, hdr->offset,
+               payload_length, conn->cid);
+
+       if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+               pr_err("Command ITT: 0x%08x received DataOUT after"
+                       " last DataOUT received, dumping payload\n",
+                       cmd->init_task_tag);
+               return iscsit_dump_data_payload(conn, payload_length, 1);
+       }
+
+       if (cmd->data_direction != DMA_TO_DEVICE) {
+               pr_err("Command ITT: 0x%08x received DataOUT for a"
+                       " NON-WRITE command.\n", cmd->init_task_tag);
+               return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR,
+                               1, 0, buf, cmd);
+       }
+       se_cmd = &cmd->se_cmd;
+       iscsit_mod_dataout_timer(cmd);
+
+       if ((hdr->offset + payload_length) > cmd->data_length) {
+               pr_err("DataOut Offset: %u, Length %u greater than"
+                       " iSCSI Command EDTL %u, protocol error.\n",
+                       hdr->offset, payload_length, cmd->data_length);
+               return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID,
+                               1, 0, buf, cmd);
+       }
+
+       if (cmd->unsolicited_data) {
+               int dump_unsolicited_data = 0;
+
+               if (conn->sess->sess_ops->InitialR2T) {
+                       pr_err("Received unexpected unsolicited data"
+                               " while InitialR2T=Yes, protocol error.\n");
+                       transport_send_check_condition_and_sense(&cmd->se_cmd,
+                                       TCM_UNEXPECTED_UNSOLICITED_DATA, 0);
+                       return -1;
+               }
+               /*
+                * Special case for dealing with Unsolicited DataOUT
+                * and Unsupported SAM WRITE Opcodes and SE resource allocation
+                * failures;
+                */
+
+               /* Something's amiss if we're not in WRITE_PENDING state... */
+               spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+               WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING);
+               spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+               spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+               if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) ||
+                    (se_cmd->se_cmd_flags & SCF_SE_CMD_FAILED))
+                       dump_unsolicited_data = 1;
+               spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+               if (dump_unsolicited_data) {
+                       /*
+                        * Check if a delayed TASK_ABORTED status needs to
+                        * be sent now if the ISCSI_FLAG_CMD_FINAL has been
+                        * received with the unsolicitied data out.
+                        */
+                       if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
+                               iscsit_stop_dataout_timer(cmd);
+
+                       transport_check_aborted_status(se_cmd,
+                                       (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+                       return iscsit_dump_data_payload(conn, payload_length, 1);
+               }
+       } else {
+               /*
+                * For the normal solicited data path:
+                *
+                * Check for a delayed TASK_ABORTED status and dump any
+                * incoming data out payload if one exists.  Also, when the
+                * ISCSI_FLAG_CMD_FINAL is set to denote the end of the current
+                * data out sequence, we decrement outstanding_r2ts.  Once
+                * outstanding_r2ts reaches zero, go ahead and send the delayed
+                * TASK_ABORTED status.
+                */
+               if (atomic_read(&se_cmd->t_transport_aborted) != 0) {
+                       if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
+                               if (--cmd->outstanding_r2ts < 1) {
+                                       iscsit_stop_dataout_timer(cmd);
+                                       transport_check_aborted_status(
+                                                       se_cmd, 1);
+                               }
+
+                       return iscsit_dump_data_payload(conn, payload_length, 1);
+               }
+       }
+       /*
+        * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and
+        * within-command recovery checks before receiving the payload.
+        */
+       ret = iscsit_check_pre_dataout(cmd, buf);
+       if (ret == DATAOUT_WITHIN_COMMAND_RECOVERY)
+               return 0;
+       else if (ret == DATAOUT_CANNOT_RECOVER)
+               return -1;
+
+       rx_size += payload_length;
+       iov = &cmd->iov_data[0];
+
+       iov_ret = iscsit_map_iovec(cmd, iov, hdr->offset, payload_length);
+       if (iov_ret < 0)
+               return -1;
+
+       iov_count += iov_ret;
+
+       padding = ((-payload_length) & 3);
+       if (padding != 0) {
+               iov[iov_count].iov_base = cmd->pad_bytes;
+               iov[iov_count++].iov_len = padding;
+               rx_size += padding;
+               pr_debug("Receiving %u padding bytes.\n", padding);
+       }
+
+       if (conn->conn_ops->DataDigest) {
+               iov[iov_count].iov_base = &checksum;
+               iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+               rx_size += ISCSI_CRC_LEN;
+       }
+
+       rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size);
+
+       iscsit_unmap_iovec(cmd);
+
+       if (rx_got != rx_size)
+               return -1;
+
+       if (conn->conn_ops->DataDigest) {
+               u32 data_crc;
+
+               data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd,
+                                                   hdr->offset, payload_length, padding,
+                                                   cmd->pad_bytes);
+
+               if (checksum != data_crc) {
+                       pr_err("ITT: 0x%08x, Offset: %u, Length: %u,"
+                               " DataSN: 0x%08x, CRC32C DataDigest 0x%08x"
+                               " does not match computed 0x%08x\n",
+                               hdr->itt, hdr->offset, payload_length,
+                               hdr->datasn, checksum, data_crc);
+                       data_crc_failed = 1;
+               } else {
+                       pr_debug("Got CRC32C DataDigest 0x%08x for"
+                               " %u bytes of Data Out\n", checksum,
+                               payload_length);
+               }
+       }
+       /*
+        * Increment post receive data and CRC values or perform
+        * within-command recovery.
+        */
+       ret = iscsit_check_post_dataout(cmd, buf, data_crc_failed);
+       if ((ret == DATAOUT_NORMAL) || (ret == DATAOUT_WITHIN_COMMAND_RECOVERY))
+               return 0;
+       else if (ret == DATAOUT_SEND_R2T) {
+               iscsit_set_dataout_sequence_values(cmd);
+               iscsit_build_r2ts_for_cmd(cmd, conn, 0);
+       } else if (ret == DATAOUT_SEND_TO_TRANSPORT) {
+               /*
+                * Handle extra special case for out of order
+                * Unsolicited Data Out.
+                */
+               spin_lock_bh(&cmd->istate_lock);
+               ooo_cmdsn = (cmd->cmd_flags & ICF_OOO_CMDSN);
+               cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
+               cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
+               spin_unlock_bh(&cmd->istate_lock);
+
+               iscsit_stop_dataout_timer(cmd);
+               return (!ooo_cmdsn) ? transport_generic_handle_data(
+                                       &cmd->se_cmd) : 0;
+       } else /* DATAOUT_CANNOT_RECOVER */
+               return -1;
+
+       return 0;
+}
+
+static int iscsit_handle_nop_out(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       unsigned char *ping_data = NULL;
+       int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size;
+       u32 checksum, data_crc, padding = 0, payload_length;
+       u64 lun;
+       struct iscsi_cmd *cmd = NULL;
+       struct kvec *iov = NULL;
+       struct iscsi_nopout *hdr;
+
+       hdr                     = (struct iscsi_nopout *) buf;
+       payload_length          = ntoh24(hdr->dlength);
+       lun                     = get_unaligned_le64(&hdr->lun);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->ttt                = be32_to_cpu(hdr->ttt);
+       hdr->cmdsn              = be32_to_cpu(hdr->cmdsn);
+       hdr->exp_statsn         = be32_to_cpu(hdr->exp_statsn);
+
+       if ((hdr->itt == 0xFFFFFFFF) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+               pr_err("NOPOUT ITT is reserved, but Immediate Bit is"
+                       " not set, protocol error.\n");
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+
+       if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+               pr_err("NOPOUT Ping Data DataSegmentLength: %u is"
+                       " greater than MaxRecvDataSegmentLength: %u, protocol"
+                       " error.\n", payload_length,
+                       conn->conn_ops->MaxRecvDataSegmentLength);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+
+       pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%09x,"
+               " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n",
+               (hdr->itt == 0xFFFFFFFF) ? "Response" : "Request",
+               hdr->itt, hdr->ttt, hdr->cmdsn, hdr->exp_statsn,
+               payload_length);
+       /*
+        * This is not a response to a Unsolicited NopIN, which means
+        * it can either be a NOPOUT ping request (with a valid ITT),
+        * or a NOPOUT not requesting a NOPIN (with a reserved ITT).
+        * Either way, make sure we allocate an struct iscsi_cmd, as both
+        * can contain ping data.
+        */
+       if (hdr->ttt == 0xFFFFFFFF) {
+               cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+               if (!cmd)
+                       return iscsit_add_reject(
+                                       ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                                       1, buf, conn);
+
+               cmd->iscsi_opcode       = ISCSI_OP_NOOP_OUT;
+               cmd->i_state            = ISTATE_SEND_NOPIN;
+               cmd->immediate_cmd      = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ?
+                                               1 : 0);
+               conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+               cmd->targ_xfer_tag      = 0xFFFFFFFF;
+               cmd->cmd_sn             = hdr->cmdsn;
+               cmd->exp_stat_sn        = hdr->exp_statsn;
+               cmd->data_direction     = DMA_NONE;
+       }
+
+       if (payload_length && (hdr->ttt == 0xFFFFFFFF)) {
+               rx_size = payload_length;
+               ping_data = kzalloc(payload_length + 1, GFP_KERNEL);
+               if (!ping_data) {
+                       pr_err("Unable to allocate memory for"
+                               " NOPOUT ping data.\n");
+                       ret = -1;
+                       goto out;
+               }
+
+               iov = &cmd->iov_misc[0];
+               iov[niov].iov_base      = ping_data;
+               iov[niov++].iov_len     = payload_length;
+
+               padding = ((-payload_length) & 3);
+               if (padding != 0) {
+                       pr_debug("Receiving %u additional bytes"
+                               " for padding.\n", padding);
+                       iov[niov].iov_base      = &cmd->pad_bytes;
+                       iov[niov++].iov_len     = padding;
+                       rx_size += padding;
+               }
+               if (conn->conn_ops->DataDigest) {
+                       iov[niov].iov_base      = &checksum;
+                       iov[niov++].iov_len     = ISCSI_CRC_LEN;
+                       rx_size += ISCSI_CRC_LEN;
+               }
+
+               rx_got = rx_data(conn, &cmd->iov_misc[0], niov, rx_size);
+               if (rx_got != rx_size) {
+                       ret = -1;
+                       goto out;
+               }
+
+               if (conn->conn_ops->DataDigest) {
+                       iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+                                       ping_data, payload_length,
+                                       padding, cmd->pad_bytes,
+                                       (u8 *)&data_crc);
+
+                       if (checksum != data_crc) {
+                               pr_err("Ping data CRC32C DataDigest"
+                               " 0x%08x does not match computed 0x%08x\n",
+                                       checksum, data_crc);
+                               if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+                                       pr_err("Unable to recover from"
+                                       " NOPOUT Ping DataCRC failure while in"
+                                               " ERL=0.\n");
+                                       ret = -1;
+                                       goto out;
+                               } else {
+                                       /*
+                                        * Silently drop this PDU and let the
+                                        * initiator plug the CmdSN gap.
+                                        */
+                                       pr_debug("Dropping NOPOUT"
+                                       " Command CmdSN: 0x%08x due to"
+                                       " DataCRC error.\n", hdr->cmdsn);
+                                       ret = 0;
+                                       goto out;
+                               }
+                       } else {
+                               pr_debug("Got CRC32C DataDigest"
+                               " 0x%08x for %u bytes of ping data.\n",
+                                       checksum, payload_length);
+                       }
+               }
+
+               ping_data[payload_length] = '\0';
+               /*
+                * Attach ping data to struct iscsi_cmd->buf_ptr.
+                */
+               cmd->buf_ptr = (void *)ping_data;
+               cmd->buf_ptr_size = payload_length;
+
+               pr_debug("Got %u bytes of NOPOUT ping"
+                       " data.\n", payload_length);
+               pr_debug("Ping Data: \"%s\"\n", ping_data);
+       }
+
+       if (hdr->itt != 0xFFFFFFFF) {
+               if (!cmd) {
+                       pr_err("Checking CmdSN for NOPOUT,"
+                               " but cmd is NULL!\n");
+                       return -1;
+               }
+               /*
+                * Initiator is expecting a NopIN ping reply,
+                */
+               spin_lock_bh(&conn->cmd_lock);
+               list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+               spin_unlock_bh(&conn->cmd_lock);
+
+               iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+               if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+                       iscsit_add_cmd_to_response_queue(cmd, conn,
+                                       cmd->i_state);
+                       return 0;
+               }
+
+               cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+               if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+                       ret = 0;
+                       goto ping_out;
+               }
+               if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_PROTOCOL_ERROR,
+                                       1, 0, buf, cmd);
+
+               return 0;
+       }
+
+       if (hdr->ttt != 0xFFFFFFFF) {
+               /*
+                * This was a response to a unsolicited NOPIN ping.
+                */
+               cmd = iscsit_find_cmd_from_ttt(conn, hdr->ttt);
+               if (!cmd)
+                       return -1;
+
+               iscsit_stop_nopin_response_timer(conn);
+
+               cmd->i_state = ISTATE_REMOVE;
+               iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+               iscsit_start_nopin_timer(conn);
+       } else {
+               /*
+                * Initiator is not expecting a NOPIN is response.
+                * Just ignore for now.
+                *
+                * iSCSI v19-91 10.18
+                * "A NOP-OUT may also be used to confirm a changed
+                *  ExpStatSN if another PDU will not be available
+                *  for a long time."
+                */
+               ret = 0;
+               goto out;
+       }
+
+       return 0;
+out:
+       if (cmd)
+               iscsit_release_cmd(cmd);
+ping_out:
+       kfree(ping_data);
+       return ret;
+}
+
+static int iscsit_handle_task_mgt_cmd(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       struct iscsi_cmd *cmd;
+       struct se_tmr_req *se_tmr;
+       struct iscsi_tmr_req *tmr_req;
+       struct iscsi_tm *hdr;
+       u32 payload_length;
+       int out_of_order_cmdsn = 0;
+       int ret;
+       u8 function;
+
+       hdr                     = (struct iscsi_tm *) buf;
+       payload_length          = ntoh24(hdr->dlength);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->rtt                = be32_to_cpu(hdr->rtt);
+       hdr->cmdsn              = be32_to_cpu(hdr->cmdsn);
+       hdr->exp_statsn         = be32_to_cpu(hdr->exp_statsn);
+       hdr->refcmdsn           = be32_to_cpu(hdr->refcmdsn);
+       hdr->exp_datasn         = be32_to_cpu(hdr->exp_datasn);
+       hdr->flags &= ~ISCSI_FLAG_CMD_FINAL;
+       function = hdr->flags;
+
+       pr_debug("Got Task Management Request ITT: 0x%08x, CmdSN:"
+               " 0x%08x, Function: 0x%02x, RefTaskTag: 0x%08x, RefCmdSN:"
+               " 0x%08x, CID: %hu\n", hdr->itt, hdr->cmdsn, function,
+               hdr->rtt, hdr->refcmdsn, conn->cid);
+
+       if ((function != ISCSI_TM_FUNC_ABORT_TASK) &&
+           ((function != ISCSI_TM_FUNC_TASK_REASSIGN) &&
+            (hdr->rtt != ISCSI_RESERVED_TAG))) {
+               pr_err("RefTaskTag should be set to 0xFFFFFFFF.\n");
+               hdr->rtt = ISCSI_RESERVED_TAG;
+       }
+
+       if ((function == ISCSI_TM_FUNC_TASK_REASSIGN) &&
+                       !(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+               pr_err("Task Management Request TASK_REASSIGN not"
+                       " issued as immediate command, bad iSCSI Initiator"
+                               "implementation\n");
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+       if ((function != ISCSI_TM_FUNC_ABORT_TASK) &&
+           (hdr->refcmdsn != ISCSI_RESERVED_TAG))
+               hdr->refcmdsn = ISCSI_RESERVED_TAG;
+
+       cmd = iscsit_allocate_se_cmd_for_tmr(conn, function);
+       if (!cmd)
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                                       1, buf, conn);
+
+       cmd->iscsi_opcode       = ISCSI_OP_SCSI_TMFUNC;
+       cmd->i_state            = ISTATE_SEND_TASKMGTRSP;
+       cmd->immediate_cmd      = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+       cmd->init_task_tag      = hdr->itt;
+       cmd->targ_xfer_tag      = 0xFFFFFFFF;
+       cmd->cmd_sn             = hdr->cmdsn;
+       cmd->exp_stat_sn        = hdr->exp_statsn;
+       se_tmr                  = cmd->se_cmd.se_tmr_req;
+       tmr_req                 = cmd->tmr_req;
+       /*
+        * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN
+        */
+       if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
+               ret = iscsit_get_lun_for_tmr(cmd,
+                               get_unaligned_le64(&hdr->lun));
+               if (ret < 0) {
+                       cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+                       se_tmr->response = ISCSI_TMF_RSP_NO_LUN;
+                       goto attach;
+               }
+       }
+
+       switch (function) {
+       case ISCSI_TM_FUNC_ABORT_TASK:
+               se_tmr->response = iscsit_tmr_abort_task(cmd, buf);
+               if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) {
+                       cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+                       goto attach;
+               }
+               break;
+       case ISCSI_TM_FUNC_ABORT_TASK_SET:
+       case ISCSI_TM_FUNC_CLEAR_ACA:
+       case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+       case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+               break;
+       case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+               if (iscsit_tmr_task_warm_reset(conn, tmr_req, buf) < 0) {
+                       cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+                       se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED;
+                       goto attach;
+               }
+               break;
+       case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+               if (iscsit_tmr_task_cold_reset(conn, tmr_req, buf) < 0) {
+                       cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+                       se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED;
+                       goto attach;
+               }
+               break;
+       case ISCSI_TM_FUNC_TASK_REASSIGN:
+               se_tmr->response = iscsit_tmr_task_reassign(cmd, buf);
+               /*
+                * Perform sanity checks on the ExpDataSN only if the
+                * TASK_REASSIGN was successful.
+                */
+               if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE)
+                       break;
+
+               if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0)
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_BOOKMARK_INVALID, 1, 1,
+                                       buf, cmd);
+               break;
+       default:
+               pr_err("Unknown TMR function: 0x%02x, protocol"
+                       " error.\n", function);
+               cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+               se_tmr->response = ISCSI_TMF_RSP_NOT_SUPPORTED;
+               goto attach;
+       }
+
+       if ((function != ISCSI_TM_FUNC_TASK_REASSIGN) &&
+           (se_tmr->response == ISCSI_TMF_RSP_COMPLETE))
+               se_tmr->call_transport = 1;
+attach:
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+               int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+               if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP)
+                       out_of_order_cmdsn = 1;
+               else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+                       return 0;
+               } else { /* (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) */
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_PROTOCOL_ERROR,
+                                       1, 0, buf, cmd);
+               }
+       }
+       iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+       if (out_of_order_cmdsn)
+               return 0;
+       /*
+        * Found the referenced task, send to transport for processing.
+        */
+       if (se_tmr->call_transport)
+               return transport_generic_handle_tmr(&cmd->se_cmd);
+
+       /*
+        * Could not find the referenced LUN, task, or Task Management
+        * command not authorized or supported.  Change state and
+        * let the tx_thread send the response.
+        *
+        * For connection recovery, this is also the default action for
+        * TMR TASK_REASSIGN.
+        */
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+       return 0;
+}
+
+/* #warning FIXME: Support Text Command parameters besides SendTargets */
+static int iscsit_handle_text_cmd(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       char *text_ptr, *text_in;
+       int cmdsn_ret, niov = 0, rx_got, rx_size;
+       u32 checksum = 0, data_crc = 0, payload_length;
+       u32 padding = 0, text_length = 0;
+       struct iscsi_cmd *cmd;
+       struct kvec iov[3];
+       struct iscsi_text *hdr;
+
+       hdr                     = (struct iscsi_text *) buf;
+       payload_length          = ntoh24(hdr->dlength);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->ttt                = be32_to_cpu(hdr->ttt);
+       hdr->cmdsn              = be32_to_cpu(hdr->cmdsn);
+       hdr->exp_statsn         = be32_to_cpu(hdr->exp_statsn);
+
+       if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+               pr_err("Unable to accept text parameter length: %u"
+                       "greater than MaxRecvDataSegmentLength %u.\n",
+                      payload_length, conn->conn_ops->MaxRecvDataSegmentLength);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+
+       pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x,"
+               " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn,
+               hdr->exp_statsn, payload_length);
+
+       rx_size = text_length = payload_length;
+       if (text_length) {
+               text_in = kzalloc(text_length, GFP_KERNEL);
+               if (!text_in) {
+                       pr_err("Unable to allocate memory for"
+                               " incoming text parameters\n");
+                       return -1;
+               }
+
+               memset(iov, 0, 3 * sizeof(struct kvec));
+               iov[niov].iov_base      = text_in;
+               iov[niov++].iov_len     = text_length;
+
+               padding = ((-payload_length) & 3);
+               if (padding != 0) {
+                       iov[niov].iov_base = cmd->pad_bytes;
+                       iov[niov++].iov_len  = padding;
+                       rx_size += padding;
+                       pr_debug("Receiving %u additional bytes"
+                                       " for padding.\n", padding);
+               }
+               if (conn->conn_ops->DataDigest) {
+                       iov[niov].iov_base      = &checksum;
+                       iov[niov++].iov_len     = ISCSI_CRC_LEN;
+                       rx_size += ISCSI_CRC_LEN;
+               }
+
+               rx_got = rx_data(conn, &iov[0], niov, rx_size);
+               if (rx_got != rx_size) {
+                       kfree(text_in);
+                       return -1;
+               }
+
+               if (conn->conn_ops->DataDigest) {
+                       iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+                                       text_in, text_length,
+                                       padding, cmd->pad_bytes,
+                                       (u8 *)&data_crc);
+
+                       if (checksum != data_crc) {
+                               pr_err("Text data CRC32C DataDigest"
+                                       " 0x%08x does not match computed"
+                                       " 0x%08x\n", checksum, data_crc);
+                               if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+                                       pr_err("Unable to recover from"
+                                       " Text Data digest failure while in"
+                                               " ERL=0.\n");
+                                       kfree(text_in);
+                                       return -1;
+                               } else {
+                                       /*
+                                        * Silently drop this PDU and let the
+                                        * initiator plug the CmdSN gap.
+                                        */
+                                       pr_debug("Dropping Text"
+                                       " Command CmdSN: 0x%08x due to"
+                                       " DataCRC error.\n", hdr->cmdsn);
+                                       kfree(text_in);
+                                       return 0;
+                               }
+                       } else {
+                               pr_debug("Got CRC32C DataDigest"
+                                       " 0x%08x for %u bytes of text data.\n",
+                                               checksum, text_length);
+                       }
+               }
+               text_in[text_length - 1] = '\0';
+               pr_debug("Successfully read %d bytes of text"
+                               " data.\n", text_length);
+
+               if (strncmp("SendTargets", text_in, 11) != 0) {
+                       pr_err("Received Text Data that is not"
+                               " SendTargets, cannot continue.\n");
+                       kfree(text_in);
+                       return -1;
+               }
+               text_ptr = strchr(text_in, '=');
+               if (!text_ptr) {
+                       pr_err("No \"=\" separator found in Text Data,"
+                               "  cannot continue.\n");
+                       kfree(text_in);
+                       return -1;
+               }
+               if (strncmp("=All", text_ptr, 4) != 0) {
+                       pr_err("Unable to locate All value for"
+                               " SendTargets key,  cannot continue.\n");
+                       kfree(text_in);
+                       return -1;
+               }
+/*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */
+               kfree(text_in);
+       }
+
+       cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+       if (!cmd)
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                                       1, buf, conn);
+
+       cmd->iscsi_opcode       = ISCSI_OP_TEXT;
+       cmd->i_state            = ISTATE_SEND_TEXTRSP;
+       cmd->immediate_cmd      = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+       conn->sess->init_task_tag = cmd->init_task_tag  = hdr->itt;
+       cmd->targ_xfer_tag      = 0xFFFFFFFF;
+       cmd->cmd_sn             = hdr->cmdsn;
+       cmd->exp_stat_sn        = hdr->exp_statsn;
+       cmd->data_direction     = DMA_NONE;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+       if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+               cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+               if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_PROTOCOL_ERROR,
+                                       1, 0, buf, cmd);
+
+               return 0;
+       }
+
+       return iscsit_execute_cmd(cmd, 0);
+}
+
+int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+       struct iscsi_conn *conn_p;
+       struct iscsi_session *sess = conn->sess;
+
+       pr_debug("Received logout request CLOSESESSION on CID: %hu"
+               " for SID: %u.\n", conn->cid, conn->sess->sid);
+
+       atomic_set(&sess->session_logout, 1);
+       atomic_set(&conn->conn_logout_remove, 1);
+       conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_SESSION;
+
+       iscsit_inc_conn_usage_count(conn);
+       iscsit_inc_session_usage_count(sess);
+
+       spin_lock_bh(&sess->conn_lock);
+       list_for_each_entry(conn_p, &sess->sess_conn_list, conn_list) {
+               if (conn_p->conn_state != TARG_CONN_STATE_LOGGED_IN)
+                       continue;
+
+               pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+               conn_p->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+       return 0;
+}
+
+int iscsit_logout_closeconnection(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+       struct iscsi_conn *l_conn;
+       struct iscsi_session *sess = conn->sess;
+
+       pr_debug("Received logout request CLOSECONNECTION for CID:"
+               " %hu on CID: %hu.\n", cmd->logout_cid, conn->cid);
+
+       /*
+        * A Logout Request with a CLOSECONNECTION reason code for a CID
+        * can arrive on a connection with a differing CID.
+        */
+       if (conn->cid == cmd->logout_cid) {
+               spin_lock_bh(&conn->state_lock);
+               pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+               conn->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+
+               atomic_set(&conn->conn_logout_remove, 1);
+               conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_CONNECTION;
+               iscsit_inc_conn_usage_count(conn);
+
+               spin_unlock_bh(&conn->state_lock);
+       } else {
+               /*
+                * Handle all different cid CLOSECONNECTION requests in
+                * iscsit_logout_post_handler_diffcid() as to give enough
+                * time for any non immediate command's CmdSN to be
+                * acknowledged on the connection in question.
+                *
+                * Here we simply make sure the CID is still around.
+                */
+               l_conn = iscsit_get_conn_from_cid(sess,
+                               cmd->logout_cid);
+               if (!l_conn) {
+                       cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND;
+                       iscsit_add_cmd_to_response_queue(cmd, conn,
+                                       cmd->i_state);
+                       return 0;
+               }
+
+               iscsit_dec_conn_usage_count(l_conn);
+       }
+
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+       return 0;
+}
+
+int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+
+       pr_debug("Received explicit REMOVECONNFORRECOVERY logout for"
+               " CID: %hu on CID: %hu.\n", cmd->logout_cid, conn->cid);
+
+       if (sess->sess_ops->ErrorRecoveryLevel != 2) {
+               pr_err("Received Logout Request REMOVECONNFORRECOVERY"
+                       " while ERL!=2.\n");
+               cmd->logout_response = ISCSI_LOGOUT_RECOVERY_UNSUPPORTED;
+               iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+               return 0;
+       }
+
+       if (conn->cid == cmd->logout_cid) {
+               pr_err("Received Logout Request REMOVECONNFORRECOVERY"
+                       " with CID: %hu on CID: %hu, implementation error.\n",
+                               cmd->logout_cid, conn->cid);
+               cmd->logout_response = ISCSI_LOGOUT_CLEANUP_FAILED;
+               iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+               return 0;
+       }
+
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+       return 0;
+}
+
+static int iscsit_handle_logout_cmd(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       int cmdsn_ret, logout_remove = 0;
+       u8 reason_code = 0;
+       struct iscsi_cmd *cmd;
+       struct iscsi_logout *hdr;
+       struct iscsi_tiqn *tiqn = iscsit_snmp_get_tiqn(conn);
+
+       hdr                     = (struct iscsi_logout *) buf;
+       reason_code             = (hdr->flags & 0x7f);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->cid                = be16_to_cpu(hdr->cid);
+       hdr->cmdsn              = be32_to_cpu(hdr->cmdsn);
+       hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+
+       if (tiqn) {
+               spin_lock(&tiqn->logout_stats.lock);
+               if (reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION)
+                       tiqn->logout_stats.normal_logouts++;
+               else
+                       tiqn->logout_stats.abnormal_logouts++;
+               spin_unlock(&tiqn->logout_stats.lock);
+       }
+
+       pr_debug("Got Logout Request ITT: 0x%08x CmdSN: 0x%08x"
+               " ExpStatSN: 0x%08x Reason: 0x%02x CID: %hu on CID: %hu\n",
+               hdr->itt, hdr->cmdsn, hdr->exp_statsn, reason_code,
+               hdr->cid, conn->cid);
+
+       if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) {
+               pr_err("Received logout request on connection that"
+                       " is not in logged in state, ignoring request.\n");
+               return 0;
+       }
+
+       cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+       if (!cmd)
+               return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1,
+                                       buf, conn);
+
+       cmd->iscsi_opcode       = ISCSI_OP_LOGOUT;
+       cmd->i_state            = ISTATE_SEND_LOGOUTRSP;
+       cmd->immediate_cmd      = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+       conn->sess->init_task_tag = cmd->init_task_tag  = hdr->itt;
+       cmd->targ_xfer_tag      = 0xFFFFFFFF;
+       cmd->cmd_sn             = hdr->cmdsn;
+       cmd->exp_stat_sn        = hdr->exp_statsn;
+       cmd->logout_cid         = hdr->cid;
+       cmd->logout_reason      = reason_code;
+       cmd->data_direction     = DMA_NONE;
+
+       /*
+        * We need to sleep in these cases (by returning 1) until the Logout
+        * Response gets sent in the tx thread.
+        */
+       if ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) ||
+          ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) &&
+           (hdr->cid == conn->cid)))
+               logout_remove = 1;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       if (reason_code != ISCSI_LOGOUT_REASON_RECOVERY)
+               iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+       /*
+        * Immediate commands are executed, well, immediately.
+        * Non-Immediate Logout Commands are executed in CmdSN order.
+        */
+       if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+               int ret = iscsit_execute_cmd(cmd, 0);
+
+               if (ret < 0)
+                       return ret;
+       } else {
+               cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+               if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+                       logout_remove = 0;
+               } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) {
+                       return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_PROTOCOL_ERROR,
+                               1, 0, buf, cmd);
+               }
+       }
+
+       return logout_remove;
+}
+
+static int iscsit_handle_snack(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       u32 unpacked_lun;
+       u64 lun;
+       struct iscsi_snack *hdr;
+
+       hdr                     = (struct iscsi_snack *) buf;
+       hdr->flags              &= ~ISCSI_FLAG_CMD_FINAL;
+       lun                     = get_unaligned_le64(&hdr->lun);
+       unpacked_lun            = scsilun_to_int((struct scsi_lun *)&lun);
+       hdr->itt                = be32_to_cpu(hdr->itt);
+       hdr->ttt                = be32_to_cpu(hdr->ttt);
+       hdr->exp_statsn         = be32_to_cpu(hdr->exp_statsn);
+       hdr->begrun             = be32_to_cpu(hdr->begrun);
+       hdr->runlength          = be32_to_cpu(hdr->runlength);
+
+       pr_debug("Got ISCSI_INIT_SNACK, ITT: 0x%08x, ExpStatSN:"
+               " 0x%08x, Type: 0x%02x, BegRun: 0x%08x, RunLength: 0x%08x,"
+               " CID: %hu\n", hdr->itt, hdr->exp_statsn, hdr->flags,
+                       hdr->begrun, hdr->runlength, conn->cid);
+
+       if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+               pr_err("Initiator sent SNACK request while in"
+                       " ErrorRecoveryLevel=0.\n");
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+       /*
+        * SNACK_DATA and SNACK_R2T are both 0,  so check which function to
+        * call from inside iscsi_send_recovery_datain_or_r2t().
+        */
+       switch (hdr->flags & ISCSI_FLAG_SNACK_TYPE_MASK) {
+       case 0:
+               return iscsit_handle_recovery_datain_or_r2t(conn, buf,
+                       hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
+               return 0;
+       case ISCSI_FLAG_SNACK_TYPE_STATUS:
+               return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
+                       hdr->begrun, hdr->runlength);
+       case ISCSI_FLAG_SNACK_TYPE_DATA_ACK:
+               return iscsit_handle_data_ack(conn, hdr->ttt, hdr->begrun,
+                       hdr->runlength);
+       case ISCSI_FLAG_SNACK_TYPE_RDATA:
+               /* FIXME: Support R-Data SNACK */
+               pr_err("R-Data SNACK Not Supported.\n");
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       default:
+               pr_err("Unknown SNACK type 0x%02x, protocol"
+                       " error.\n", hdr->flags & 0x0f);
+               return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buf, conn);
+       }
+
+       return 0;
+}
+
+static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn)
+{
+       if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) ||
+           (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) {
+               wait_for_completion_interruptible_timeout(
+                                       &conn->rx_half_close_comp,
+                                       ISCSI_RX_THREAD_TCP_TIMEOUT * HZ);
+       }
+}
+
+static int iscsit_handle_immediate_data(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf,
+       u32 length)
+{
+       int iov_ret, rx_got = 0, rx_size = 0;
+       u32 checksum, iov_count = 0, padding = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct kvec *iov;
+
+       iov_ret = iscsit_map_iovec(cmd, cmd->iov_data, cmd->write_data_done, length);
+       if (iov_ret < 0)
+               return IMMEDIATE_DATA_CANNOT_RECOVER;
+
+       rx_size = length;
+       iov_count = iov_ret;
+       iov = &cmd->iov_data[0];
+
+       padding = ((-length) & 3);
+       if (padding != 0) {
+               iov[iov_count].iov_base = cmd->pad_bytes;
+               iov[iov_count++].iov_len = padding;
+               rx_size += padding;
+       }
+
+       if (conn->conn_ops->DataDigest) {
+               iov[iov_count].iov_base         = &checksum;
+               iov[iov_count++].iov_len        = ISCSI_CRC_LEN;
+               rx_size += ISCSI_CRC_LEN;
+       }
+
+       rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size);
+
+       iscsit_unmap_iovec(cmd);
+
+       if (rx_got != rx_size) {
+               iscsit_rx_thread_wait_for_tcp(conn);
+               return IMMEDIATE_DATA_CANNOT_RECOVER;
+       }
+
+       if (conn->conn_ops->DataDigest) {
+               u32 data_crc;
+
+               data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd,
+                                                   cmd->write_data_done, length, padding,
+                                                   cmd->pad_bytes);
+
+               if (checksum != data_crc) {
+                       pr_err("ImmediateData CRC32C DataDigest 0x%08x"
+                               " does not match computed 0x%08x\n", checksum,
+                               data_crc);
+
+                       if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+                               pr_err("Unable to recover from"
+                                       " Immediate Data digest failure while"
+                                       " in ERL=0.\n");
+                               iscsit_add_reject_from_cmd(
+                                               ISCSI_REASON_DATA_DIGEST_ERROR,
+                                               1, 0, buf, cmd);
+                               return IMMEDIATE_DATA_CANNOT_RECOVER;
+                       } else {
+                               iscsit_add_reject_from_cmd(
+                                               ISCSI_REASON_DATA_DIGEST_ERROR,
+                                               0, 0, buf, cmd);
+                               return IMMEDIATE_DATA_ERL1_CRC_FAILURE;
+                       }
+               } else {
+                       pr_debug("Got CRC32C DataDigest 0x%08x for"
+                               " %u bytes of Immediate Data\n", checksum,
+                               length);
+               }
+       }
+
+       cmd->write_data_done += length;
+
+       if (cmd->write_data_done == cmd->data_length) {
+               spin_lock_bh(&cmd->istate_lock);
+               cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
+               cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
+               spin_unlock_bh(&cmd->istate_lock);
+       }
+
+       return IMMEDIATE_DATA_NORMAL_OPERATION;
+}
+
+/*
+ *     Called with sess->conn_lock held.
+ */
+/* #warning iscsi_build_conn_drop_async_message() only sends out on connections
+       with active network interface */
+static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn)
+{
+       struct iscsi_cmd *cmd;
+       struct iscsi_conn *conn_p;
+
+       /*
+        * Only send a Asynchronous Message on connections whos network
+        * interface is still functional.
+        */
+       list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) {
+               if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) {
+                       iscsit_inc_conn_usage_count(conn_p);
+                       break;
+               }
+       }
+
+       if (!conn_p)
+               return;
+
+       cmd = iscsit_allocate_cmd(conn_p, GFP_KERNEL);
+       if (!cmd) {
+               iscsit_dec_conn_usage_count(conn_p);
+               return;
+       }
+
+       cmd->logout_cid = conn->cid;
+       cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT;
+       cmd->i_state = ISTATE_SEND_ASYNCMSG;
+
+       spin_lock_bh(&conn_p->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn_p->conn_cmd_list);
+       spin_unlock_bh(&conn_p->cmd_lock);
+
+       iscsit_add_cmd_to_response_queue(cmd, conn_p, cmd->i_state);
+       iscsit_dec_conn_usage_count(conn_p);
+}
+
+static int iscsit_send_conn_drop_async_message(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_async *hdr;
+
+       cmd->tx_size = ISCSI_HDR_LEN;
+       cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT;
+
+       hdr                     = (struct iscsi_async *) cmd->pdu;
+       hdr->opcode             = ISCSI_OP_ASYNC_EVENT;
+       hdr->flags              = ISCSI_FLAG_CMD_FINAL;
+       cmd->init_task_tag      = 0xFFFFFFFF;
+       cmd->targ_xfer_tag      = 0xFFFFFFFF;
+       put_unaligned_be64(0xFFFFFFFFFFFFFFFFULL, &hdr->rsvd4[0]);
+       cmd->stat_sn            = conn->stat_sn++;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+       hdr->async_event        = ISCSI_ASYNC_MSG_DROPPING_CONNECTION;
+       hdr->param1             = cpu_to_be16(cmd->logout_cid);
+       hdr->param2             = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait);
+       hdr->param3             = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain);
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               cmd->tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32C HeaderDigest to"
+                       " Async Message 0x%08x\n", *header_digest);
+       }
+
+       cmd->iov_misc[0].iov_base       = cmd->pdu;
+       cmd->iov_misc[0].iov_len        = cmd->tx_size;
+       cmd->iov_misc_count             = 1;
+
+       pr_debug("Sending Connection Dropped Async Message StatSN:"
+               " 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn,
+                       cmd->logout_cid, conn->cid);
+       return 0;
+}
+
+static int iscsit_send_data_in(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn,
+       int *eodr)
+{
+       int iov_ret = 0, set_statsn = 0;
+       u32 iov_count = 0, tx_size = 0;
+       struct iscsi_datain datain;
+       struct iscsi_datain_req *dr;
+       struct iscsi_data_rsp *hdr;
+       struct kvec *iov;
+
+       memset(&datain, 0, sizeof(struct iscsi_datain));
+       dr = iscsit_get_datain_values(cmd, &datain);
+       if (!dr) {
+               pr_err("iscsit_get_datain_values failed for ITT: 0x%08x\n",
+                               cmd->init_task_tag);
+               return -1;
+       }
+
+       /*
+        * Be paranoid and double check the logic for now.
+        */
+       if ((datain.offset + datain.length) > cmd->data_length) {
+               pr_err("Command ITT: 0x%08x, datain.offset: %u and"
+                       " datain.length: %u exceeds cmd->data_length: %u\n",
+                       cmd->init_task_tag, datain.offset, datain.length,
+                               cmd->data_length);
+               return -1;
+       }
+
+       spin_lock_bh(&conn->sess->session_stats_lock);
+       conn->sess->tx_data_octets += datain.length;
+       if (conn->sess->se_sess->se_node_acl) {
+               spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+               conn->sess->se_sess->se_node_acl->read_bytes += datain.length;
+               spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+       }
+       spin_unlock_bh(&conn->sess->session_stats_lock);
+       /*
+        * Special case for successfully execution w/ both DATAIN
+        * and Sense Data.
+        */
+       if ((datain.flags & ISCSI_FLAG_DATA_STATUS) &&
+           (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE))
+               datain.flags &= ~ISCSI_FLAG_DATA_STATUS;
+       else {
+               if ((dr->dr_complete == DATAIN_COMPLETE_NORMAL) ||
+                   (dr->dr_complete == DATAIN_COMPLETE_CONNECTION_RECOVERY)) {
+                       iscsit_increment_maxcmdsn(cmd, conn->sess);
+                       cmd->stat_sn = conn->stat_sn++;
+                       set_statsn = 1;
+               } else if (dr->dr_complete ==
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY)
+                       set_statsn = 1;
+       }
+
+       hdr     = (struct iscsi_data_rsp *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_SCSI_DATA_IN;
+       hdr->flags              = datain.flags;
+       if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+               if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) {
+                       hdr->flags |= ISCSI_FLAG_DATA_OVERFLOW;
+                       hdr->residual_count = cpu_to_be32(cmd->residual_count);
+               } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) {
+                       hdr->flags |= ISCSI_FLAG_DATA_UNDERFLOW;
+                       hdr->residual_count = cpu_to_be32(cmd->residual_count);
+               }
+       }
+       hton24(hdr->dlength, datain.length);
+       if (hdr->flags & ISCSI_FLAG_DATA_ACK)
+               int_to_scsilun(cmd->se_cmd.orig_fe_lun,
+                               (struct scsi_lun *)&hdr->lun);
+       else
+               put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun);
+
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       hdr->ttt                = (hdr->flags & ISCSI_FLAG_DATA_ACK) ?
+                                  cpu_to_be32(cmd->targ_xfer_tag) :
+                                  0xFFFFFFFF;
+       hdr->statsn             = (set_statsn) ? cpu_to_be32(cmd->stat_sn) :
+                                               0xFFFFFFFF;
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+       hdr->datasn             = cpu_to_be32(datain.data_sn);
+       hdr->offset             = cpu_to_be32(datain.offset);
+
+       iov = &cmd->iov_data[0];
+       iov[iov_count].iov_base = cmd->pdu;
+       iov[iov_count++].iov_len        = ISCSI_HDR_LEN;
+       tx_size += ISCSI_HDR_LEN;
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               iov[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+
+               pr_debug("Attaching CRC32 HeaderDigest"
+                       " for DataIN PDU 0x%08x\n", *header_digest);
+       }
+
+       iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], datain.offset, datain.length);
+       if (iov_ret < 0)
+               return -1;
+
+       iov_count += iov_ret;
+       tx_size += datain.length;
+
+       cmd->padding = ((-datain.length) & 3);
+       if (cmd->padding) {
+               iov[iov_count].iov_base         = cmd->pad_bytes;
+               iov[iov_count++].iov_len        = cmd->padding;
+               tx_size += cmd->padding;
+
+               pr_debug("Attaching %u padding bytes\n",
+                               cmd->padding);
+       }
+       if (conn->conn_ops->DataDigest) {
+               cmd->data_crc = iscsit_do_crypto_hash_sg(&conn->conn_tx_hash, cmd,
+                        datain.offset, datain.length, cmd->padding, cmd->pad_bytes);
+
+               iov[iov_count].iov_base = &cmd->data_crc;
+               iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+
+               pr_debug("Attached CRC32C DataDigest %d bytes, crc"
+                       " 0x%08x\n", datain.length+cmd->padding, cmd->data_crc);
+       }
+
+       cmd->iov_data_count = iov_count;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Built DataIN ITT: 0x%08x, StatSN: 0x%08x,"
+               " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
+               cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn),
+               ntohl(hdr->offset), datain.length, conn->cid);
+
+       if (dr->dr_complete) {
+               *eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ?
+                               2 : 1;
+               iscsit_free_datain_req(cmd, dr);
+       }
+
+       return 0;
+}
+
+static int iscsit_send_logout_response(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       int niov = 0, tx_size;
+       struct iscsi_conn *logout_conn = NULL;
+       struct iscsi_conn_recovery *cr = NULL;
+       struct iscsi_session *sess = conn->sess;
+       struct kvec *iov;
+       struct iscsi_logout_rsp *hdr;
+       /*
+        * The actual shutting down of Sessions and/or Connections
+        * for CLOSESESSION and CLOSECONNECTION Logout Requests
+        * is done in scsi_logout_post_handler().
+        */
+       switch (cmd->logout_reason) {
+       case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+               pr_debug("iSCSI session logout successful, setting"
+                       " logout response to ISCSI_LOGOUT_SUCCESS.\n");
+               cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+               break;
+       case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+               if (cmd->logout_response == ISCSI_LOGOUT_CID_NOT_FOUND)
+                       break;
+               /*
+                * For CLOSECONNECTION logout requests carrying
+                * a matching logout CID -> local CID, the reference
+                * for the local CID will have been incremented in
+                * iscsi_logout_closeconnection().
+                *
+                * For CLOSECONNECTION logout requests carrying
+                * a different CID than the connection it arrived
+                * on, the connection responding to cmd->logout_cid
+                * is stopped in iscsit_logout_post_handler_diffcid().
+                */
+
+               pr_debug("iSCSI CID: %hu logout on CID: %hu"
+                       " successful.\n", cmd->logout_cid, conn->cid);
+               cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+               break;
+       case ISCSI_LOGOUT_REASON_RECOVERY:
+               if ((cmd->logout_response == ISCSI_LOGOUT_RECOVERY_UNSUPPORTED) ||
+                   (cmd->logout_response == ISCSI_LOGOUT_CLEANUP_FAILED))
+                       break;
+               /*
+                * If the connection is still active from our point of view
+                * force connection recovery to occur.
+                */
+               logout_conn = iscsit_get_conn_from_cid_rcfr(sess,
+                               cmd->logout_cid);
+               if ((logout_conn)) {
+                       iscsit_connection_reinstatement_rcfr(logout_conn);
+                       iscsit_dec_conn_usage_count(logout_conn);
+               }
+
+               cr = iscsit_get_inactive_connection_recovery_entry(
+                               conn->sess, cmd->logout_cid);
+               if (!cr) {
+                       pr_err("Unable to locate CID: %hu for"
+                       " REMOVECONNFORRECOVERY Logout Request.\n",
+                               cmd->logout_cid);
+                       cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND;
+                       break;
+               }
+
+               iscsit_discard_cr_cmds_by_expstatsn(cr, cmd->exp_stat_sn);
+
+               pr_debug("iSCSI REMOVECONNFORRECOVERY logout"
+                       " for recovery for CID: %hu on CID: %hu successful.\n",
+                               cmd->logout_cid, conn->cid);
+               cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+               break;
+       default:
+               pr_err("Unknown cmd->logout_reason: 0x%02x\n",
+                               cmd->logout_reason);
+               return -1;
+       }
+
+       tx_size = ISCSI_HDR_LEN;
+       hdr                     = (struct iscsi_logout_rsp *)cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_LOGOUT_RSP;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       hdr->response           = cmd->logout_response;
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       cmd->stat_sn            = conn->stat_sn++;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+
+       iscsit_increment_maxcmdsn(cmd, conn->sess);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       iov = &cmd->iov_misc[0];
+       iov[niov].iov_base      = cmd->pdu;
+       iov[niov++].iov_len     = ISCSI_HDR_LEN;
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               iov[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32C HeaderDigest to"
+                       " Logout Response 0x%08x\n", *header_digest);
+       }
+       cmd->iov_misc_count = niov;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Sending Logout Response ITT: 0x%08x StatSN:"
+               " 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n",
+               cmd->init_task_tag, cmd->stat_sn, hdr->response,
+               cmd->logout_cid, conn->cid);
+
+       return 0;
+}
+
+/*
+ *     Unsolicited NOPIN, either requesting a response or not.
+ */
+static int iscsit_send_unsolicited_nopin(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn,
+       int want_response)
+{
+       int tx_size = ISCSI_HDR_LEN;
+       struct iscsi_nopin *hdr;
+
+       hdr                     = (struct iscsi_nopin *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_NOOP_IN;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       hdr->ttt                = cpu_to_be32(cmd->targ_xfer_tag);
+       cmd->stat_sn            = conn->stat_sn;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32C HeaderDigest to"
+                       " NopIN 0x%08x\n", *header_digest);
+       }
+
+       cmd->iov_misc[0].iov_base       = cmd->pdu;
+       cmd->iov_misc[0].iov_len        = tx_size;
+       cmd->iov_misc_count     = 1;
+       cmd->tx_size            = tx_size;
+
+       pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:"
+               " 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid);
+
+       return 0;
+}
+
+static int iscsit_send_nopin_response(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       int niov = 0, tx_size;
+       u32 padding = 0;
+       struct kvec *iov;
+       struct iscsi_nopin *hdr;
+
+       tx_size = ISCSI_HDR_LEN;
+       hdr                     = (struct iscsi_nopin *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_NOOP_IN;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       hton24(hdr->dlength, cmd->buf_ptr_size);
+       put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun);
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       hdr->ttt                = cpu_to_be32(cmd->targ_xfer_tag);
+       cmd->stat_sn            = conn->stat_sn++;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+
+       iscsit_increment_maxcmdsn(cmd, conn->sess);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       iov = &cmd->iov_misc[0];
+       iov[niov].iov_base      = cmd->pdu;
+       iov[niov++].iov_len     = ISCSI_HDR_LEN;
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               iov[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32C HeaderDigest"
+                       " to NopIn 0x%08x\n", *header_digest);
+       }
+
+       /*
+        * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr.
+        * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size.
+        */
+       if (cmd->buf_ptr_size) {
+               iov[niov].iov_base      = cmd->buf_ptr;
+               iov[niov++].iov_len     = cmd->buf_ptr_size;
+               tx_size += cmd->buf_ptr_size;
+
+               pr_debug("Echoing back %u bytes of ping"
+                       " data.\n", cmd->buf_ptr_size);
+
+               padding = ((-cmd->buf_ptr_size) & 3);
+               if (padding != 0) {
+                       iov[niov].iov_base = &cmd->pad_bytes;
+                       iov[niov++].iov_len = padding;
+                       tx_size += padding;
+                       pr_debug("Attaching %u additional"
+                               " padding bytes.\n", padding);
+               }
+               if (conn->conn_ops->DataDigest) {
+                       iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               cmd->buf_ptr, cmd->buf_ptr_size,
+                               padding, (u8 *)&cmd->pad_bytes,
+                               (u8 *)&cmd->data_crc);
+
+                       iov[niov].iov_base = &cmd->data_crc;
+                       iov[niov++].iov_len = ISCSI_CRC_LEN;
+                       tx_size += ISCSI_CRC_LEN;
+                       pr_debug("Attached DataDigest for %u"
+                               " bytes of ping data, CRC 0x%08x\n",
+                               cmd->buf_ptr_size, cmd->data_crc);
+               }
+       }
+
+       cmd->iov_misc_count = niov;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Sending NOPIN Response ITT: 0x%08x, TTT:"
+               " 0x%08x, StatSN: 0x%08x, Length %u\n", cmd->init_task_tag,
+               cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size);
+
+       return 0;
+}
+
+int iscsit_send_r2t(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       int tx_size = 0;
+       struct iscsi_r2t *r2t;
+       struct iscsi_r2t_rsp *hdr;
+
+       r2t = iscsit_get_r2t_from_list(cmd);
+       if (!r2t)
+               return -1;
+
+       hdr                     = (struct iscsi_r2t_rsp *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_R2T;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       int_to_scsilun(cmd->se_cmd.orig_fe_lun,
+                       (struct scsi_lun *)&hdr->lun);
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       spin_lock_bh(&conn->sess->ttt_lock);
+       r2t->targ_xfer_tag      = conn->sess->targ_xfer_tag++;
+       if (r2t->targ_xfer_tag == 0xFFFFFFFF)
+               r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+       spin_unlock_bh(&conn->sess->ttt_lock);
+       hdr->ttt                = cpu_to_be32(r2t->targ_xfer_tag);
+       hdr->statsn             = cpu_to_be32(conn->stat_sn);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+       hdr->r2tsn              = cpu_to_be32(r2t->r2t_sn);
+       hdr->data_offset        = cpu_to_be32(r2t->offset);
+       hdr->data_length        = cpu_to_be32(r2t->xfer_len);
+
+       cmd->iov_misc[0].iov_base       = cmd->pdu;
+       cmd->iov_misc[0].iov_len        = ISCSI_HDR_LEN;
+       tx_size += ISCSI_HDR_LEN;
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32 HeaderDigest for R2T"
+                       " PDU 0x%08x\n", *header_digest);
+       }
+
+       pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:"
+               " 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n",
+               (!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag,
+               r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn,
+                       r2t->offset, r2t->xfer_len, conn->cid);
+
+       cmd->iov_misc_count = 1;
+       cmd->tx_size = tx_size;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       r2t->sent_r2t = 1;
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       return 0;
+}
+
+/*
+ *     type 0: Normal Operation.
+ *     type 1: Called from Storage Transport.
+ *     type 2: Called from iscsi_task_reassign_complete_write() for
+ *             connection recovery.
+ */
+int iscsit_build_r2ts_for_cmd(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn,
+       int type)
+{
+       int first_r2t = 1;
+       u32 offset = 0, xfer_len = 0;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       if (cmd->cmd_flags & ICF_SENT_LAST_R2T) {
+               spin_unlock_bh(&cmd->r2t_lock);
+               return 0;
+       }
+
+       if (conn->sess->sess_ops->DataSequenceInOrder && (type != 2))
+               if (cmd->r2t_offset < cmd->write_data_done)
+                       cmd->r2t_offset = cmd->write_data_done;
+
+       while (cmd->outstanding_r2ts < conn->sess->sess_ops->MaxOutstandingR2T) {
+               if (conn->sess->sess_ops->DataSequenceInOrder) {
+                       offset = cmd->r2t_offset;
+
+                       if (first_r2t && (type == 2)) {
+                               xfer_len = ((offset +
+                                            (conn->sess->sess_ops->MaxBurstLength -
+                                            cmd->next_burst_len) >
+                                            cmd->data_length) ?
+                                           (cmd->data_length - offset) :
+                                           (conn->sess->sess_ops->MaxBurstLength -
+                                            cmd->next_burst_len));
+                       } else {
+                               xfer_len = ((offset +
+                                            conn->sess->sess_ops->MaxBurstLength) >
+                                            cmd->data_length) ?
+                                            (cmd->data_length - offset) :
+                                            conn->sess->sess_ops->MaxBurstLength;
+                       }
+                       cmd->r2t_offset += xfer_len;
+
+                       if (cmd->r2t_offset == cmd->data_length)
+                               cmd->cmd_flags |= ICF_SENT_LAST_R2T;
+               } else {
+                       struct iscsi_seq *seq;
+
+                       seq = iscsit_get_seq_holder_for_r2t(cmd);
+                       if (!seq) {
+                               spin_unlock_bh(&cmd->r2t_lock);
+                               return -1;
+                       }
+
+                       offset = seq->offset;
+                       xfer_len = seq->xfer_len;
+
+                       if (cmd->seq_send_order == cmd->seq_count)
+                               cmd->cmd_flags |= ICF_SENT_LAST_R2T;
+               }
+               cmd->outstanding_r2ts++;
+               first_r2t = 0;
+
+               if (iscsit_add_r2t_to_list(cmd, offset, xfer_len, 0, 0) < 0) {
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return -1;
+               }
+
+               if (cmd->cmd_flags & ICF_SENT_LAST_R2T)
+                       break;
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       return 0;
+}
+
+static int iscsit_send_status(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       u8 iov_count = 0, recovery;
+       u32 padding = 0, tx_size = 0;
+       struct iscsi_scsi_rsp *hdr;
+       struct kvec *iov;
+
+       recovery = (cmd->i_state != ISTATE_SEND_STATUS);
+       if (!recovery)
+               cmd->stat_sn = conn->stat_sn++;
+
+       spin_lock_bh(&conn->sess->session_stats_lock);
+       conn->sess->rsp_pdus++;
+       spin_unlock_bh(&conn->sess->session_stats_lock);
+
+       hdr                     = (struct iscsi_scsi_rsp *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_SCSI_CMD_RSP;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) {
+               hdr->flags |= ISCSI_FLAG_CMD_OVERFLOW;
+               hdr->residual_count = cpu_to_be32(cmd->residual_count);
+       } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) {
+               hdr->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+               hdr->residual_count = cpu_to_be32(cmd->residual_count);
+       }
+       hdr->response           = cmd->iscsi_response;
+       hdr->cmd_status         = cmd->se_cmd.scsi_status;
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+
+       iscsit_increment_maxcmdsn(cmd, conn->sess);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       iov = &cmd->iov_misc[0];
+       iov[iov_count].iov_base = cmd->pdu;
+       iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+       tx_size += ISCSI_HDR_LEN;
+
+       /*
+        * Attach SENSE DATA payload to iSCSI Response PDU
+        */
+       if (cmd->se_cmd.sense_buffer &&
+          ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
+           (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
+               padding         = -(cmd->se_cmd.scsi_sense_length) & 3;
+               hton24(hdr->dlength, cmd->se_cmd.scsi_sense_length);
+               iov[iov_count].iov_base = cmd->se_cmd.sense_buffer;
+               iov[iov_count++].iov_len =
+                               (cmd->se_cmd.scsi_sense_length + padding);
+               tx_size += cmd->se_cmd.scsi_sense_length;
+
+               if (padding) {
+                       memset(cmd->se_cmd.sense_buffer +
+                               cmd->se_cmd.scsi_sense_length, 0, padding);
+                       tx_size += padding;
+                       pr_debug("Adding %u bytes of padding to"
+                               " SENSE.\n", padding);
+               }
+
+               if (conn->conn_ops->DataDigest) {
+                       iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               cmd->se_cmd.sense_buffer,
+                               (cmd->se_cmd.scsi_sense_length + padding),
+                               0, NULL, (u8 *)&cmd->data_crc);
+
+                       iov[iov_count].iov_base    = &cmd->data_crc;
+                       iov[iov_count++].iov_len     = ISCSI_CRC_LEN;
+                       tx_size += ISCSI_CRC_LEN;
+
+                       pr_debug("Attaching CRC32 DataDigest for"
+                               " SENSE, %u bytes CRC 0x%08x\n",
+                               (cmd->se_cmd.scsi_sense_length + padding),
+                               cmd->data_crc);
+               }
+
+               pr_debug("Attaching SENSE DATA: %u bytes to iSCSI"
+                               " Response PDU\n",
+                               cmd->se_cmd.scsi_sense_length);
+       }
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               iov[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32 HeaderDigest for Response"
+                               " PDU 0x%08x\n", *header_digest);
+       }
+
+       cmd->iov_misc_count = iov_count;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Built %sSCSI Response, ITT: 0x%08x, StatSN: 0x%08x,"
+               " Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n",
+               (!recovery) ? "" : "Recovery ", cmd->init_task_tag,
+               cmd->stat_sn, 0x00, cmd->se_cmd.scsi_status, conn->cid);
+
+       return 0;
+}
+
+static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr)
+{
+       switch (se_tmr->response) {
+       case TMR_FUNCTION_COMPLETE:
+               return ISCSI_TMF_RSP_COMPLETE;
+       case TMR_TASK_DOES_NOT_EXIST:
+               return ISCSI_TMF_RSP_NO_TASK;
+       case TMR_LUN_DOES_NOT_EXIST:
+               return ISCSI_TMF_RSP_NO_LUN;
+       case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED:
+               return ISCSI_TMF_RSP_NOT_SUPPORTED;
+       case TMR_FUNCTION_AUTHORIZATION_FAILED:
+               return ISCSI_TMF_RSP_AUTH_FAILED;
+       case TMR_FUNCTION_REJECTED:
+       default:
+               return ISCSI_TMF_RSP_REJECTED;
+       }
+}
+
+static int iscsit_send_task_mgt_rsp(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+       struct iscsi_tm_rsp *hdr;
+       u32 tx_size = 0;
+
+       hdr                     = (struct iscsi_tm_rsp *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_SCSI_TMFUNC_RSP;
+       hdr->response           = iscsit_convert_tcm_tmr_rsp(se_tmr);
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       cmd->stat_sn            = conn->stat_sn++;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+
+       iscsit_increment_maxcmdsn(cmd, conn->sess);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       cmd->iov_misc[0].iov_base       = cmd->pdu;
+       cmd->iov_misc[0].iov_len        = ISCSI_HDR_LEN;
+       tx_size += ISCSI_HDR_LEN;
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32 HeaderDigest for Task"
+                       " Mgmt Response PDU 0x%08x\n", *header_digest);
+       }
+
+       cmd->iov_misc_count = 1;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Built Task Management Response ITT: 0x%08x,"
+               " StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n",
+               cmd->init_task_tag, cmd->stat_sn, hdr->response, conn->cid);
+
+       return 0;
+}
+
+static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+{
+       char *payload = NULL;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tiqn *tiqn;
+       struct iscsi_tpg_np *tpg_np;
+       int buffer_len, end_of_buf = 0, len = 0, payload_len = 0;
+       unsigned char buf[256];
+
+       buffer_len = (conn->conn_ops->MaxRecvDataSegmentLength > 32768) ?
+                       32768 : conn->conn_ops->MaxRecvDataSegmentLength;
+
+       memset(buf, 0, 256);
+
+       payload = kzalloc(buffer_len, GFP_KERNEL);
+       if (!payload) {
+               pr_err("Unable to allocate memory for sendtargets"
+                               " response.\n");
+               return -ENOMEM;
+       }
+
+       spin_lock(&tiqn_lock);
+       list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) {
+               len = sprintf(buf, "TargetName=%s", tiqn->tiqn);
+               len += 1;
+
+               if ((len + payload_len) > buffer_len) {
+                       spin_unlock(&tiqn->tiqn_tpg_lock);
+                       end_of_buf = 1;
+                       goto eob;
+               }
+               memcpy((void *)payload + payload_len, buf, len);
+               payload_len += len;
+
+               spin_lock(&tiqn->tiqn_tpg_lock);
+               list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) {
+
+                       spin_lock(&tpg->tpg_state_lock);
+                       if ((tpg->tpg_state == TPG_STATE_FREE) ||
+                           (tpg->tpg_state == TPG_STATE_INACTIVE)) {
+                               spin_unlock(&tpg->tpg_state_lock);
+                               continue;
+                       }
+                       spin_unlock(&tpg->tpg_state_lock);
+
+                       spin_lock(&tpg->tpg_np_lock);
+                       list_for_each_entry(tpg_np, &tpg->tpg_gnp_list,
+                                               tpg_np_list) {
+                               len = sprintf(buf, "TargetAddress="
+                                       "%s%s%s:%hu,%hu",
+                                       (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ?
+                                       "[" : "", tpg_np->tpg_np->np_ip,
+                                       (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ?
+                                       "]" : "", tpg_np->tpg_np->np_port,
+                                       tpg->tpgt);
+                               len += 1;
+
+                               if ((len + payload_len) > buffer_len) {
+                                       spin_unlock(&tpg->tpg_np_lock);
+                                       spin_unlock(&tiqn->tiqn_tpg_lock);
+                                       end_of_buf = 1;
+                                       goto eob;
+                               }
+                               memcpy((void *)payload + payload_len, buf, len);
+                               payload_len += len;
+                       }
+                       spin_unlock(&tpg->tpg_np_lock);
+               }
+               spin_unlock(&tiqn->tiqn_tpg_lock);
+eob:
+               if (end_of_buf)
+                       break;
+       }
+       spin_unlock(&tiqn_lock);
+
+       cmd->buf_ptr = payload;
+
+       return payload_len;
+}
+
+/*
+ *     FIXME: Add support for F_BIT and C_BIT when the length is longer than
+ *     MaxRecvDataSegmentLength.
+ */
+static int iscsit_send_text_rsp(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_text_rsp *hdr;
+       struct kvec *iov;
+       u32 padding = 0, tx_size = 0;
+       int text_length, iov_count = 0;
+
+       text_length = iscsit_build_sendtargets_response(cmd);
+       if (text_length < 0)
+               return text_length;
+
+       padding = ((-text_length) & 3);
+       if (padding != 0) {
+               memset(cmd->buf_ptr + text_length, 0, padding);
+               pr_debug("Attaching %u additional bytes for"
+                       " padding.\n", padding);
+       }
+
+       hdr                     = (struct iscsi_text_rsp *) cmd->pdu;
+       memset(hdr, 0, ISCSI_HDR_LEN);
+       hdr->opcode             = ISCSI_OP_TEXT_RSP;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       hton24(hdr->dlength, text_length);
+       hdr->itt                = cpu_to_be32(cmd->init_task_tag);
+       hdr->ttt                = cpu_to_be32(cmd->targ_xfer_tag);
+       cmd->stat_sn            = conn->stat_sn++;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+
+       iscsit_increment_maxcmdsn(cmd, conn->sess);
+       hdr->exp_cmdsn          = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn          = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       iov = &cmd->iov_misc[0];
+
+       iov[iov_count].iov_base = cmd->pdu;
+       iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+       iov[iov_count].iov_base = cmd->buf_ptr;
+       iov[iov_count++].iov_len = text_length + padding;
+
+       tx_size += (ISCSI_HDR_LEN + text_length + padding);
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               iov[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32 HeaderDigest for"
+                       " Text Response PDU 0x%08x\n", *header_digest);
+       }
+
+       if (conn->conn_ops->DataDigest) {
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               cmd->buf_ptr, (text_length + padding),
+                               0, NULL, (u8 *)&cmd->data_crc);
+
+               iov[iov_count].iov_base = &cmd->data_crc;
+               iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+
+               pr_debug("Attaching DataDigest for %u bytes of text"
+                       " data, CRC 0x%08x\n", (text_length + padding),
+                       cmd->data_crc);
+       }
+
+       cmd->iov_misc_count = iov_count;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x,"
+               " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn,
+                       text_length, conn->cid);
+       return 0;
+}
+
+static int iscsit_send_reject(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       u32 iov_count = 0, tx_size = 0;
+       struct iscsi_reject *hdr;
+       struct kvec *iov;
+
+       hdr                     = (struct iscsi_reject *) cmd->pdu;
+       hdr->opcode             = ISCSI_OP_REJECT;
+       hdr->flags              |= ISCSI_FLAG_CMD_FINAL;
+       hton24(hdr->dlength, ISCSI_HDR_LEN);
+       cmd->stat_sn            = conn->stat_sn++;
+       hdr->statsn             = cpu_to_be32(cmd->stat_sn);
+       hdr->exp_cmdsn  = cpu_to_be32(conn->sess->exp_cmd_sn);
+       hdr->max_cmdsn  = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       iov = &cmd->iov_misc[0];
+
+       iov[iov_count].iov_base = cmd->pdu;
+       iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+       iov[iov_count].iov_base = cmd->buf_ptr;
+       iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+
+       tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN);
+
+       if (conn->conn_ops->HeaderDigest) {
+               u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)hdr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)header_digest);
+
+               iov[0].iov_len += ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32 HeaderDigest for"
+                       " REJECT PDU 0x%08x\n", *header_digest);
+       }
+
+       if (conn->conn_ops->DataDigest) {
+               iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+                               (unsigned char *)cmd->buf_ptr, ISCSI_HDR_LEN,
+                               0, NULL, (u8 *)&cmd->data_crc);
+
+               iov[iov_count].iov_base = &cmd->data_crc;
+               iov[iov_count++].iov_len  = ISCSI_CRC_LEN;
+               tx_size += ISCSI_CRC_LEN;
+               pr_debug("Attaching CRC32 DataDigest for REJECT"
+                               " PDU 0x%08x\n", cmd->data_crc);
+       }
+
+       cmd->iov_misc_count = iov_count;
+       cmd->tx_size = tx_size;
+
+       pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x,"
+               " CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid);
+
+       return 0;
+}
+
+static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn)
+{
+       if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) ||
+           (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) {
+               wait_for_completion_interruptible_timeout(
+                                       &conn->tx_half_close_comp,
+                                       ISCSI_TX_THREAD_TCP_TIMEOUT * HZ);
+       }
+}
+
+#ifdef CONFIG_SMP
+
+void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
+{
+       struct iscsi_thread_set *ts = conn->thread_set;
+       int ord, cpu;
+       /*
+        * thread_id is assigned from iscsit_global->ts_bitmap from
+        * within iscsi_thread_set.c:iscsi_allocate_thread_sets()
+        *
+        * Here we use thread_id to determine which CPU that this
+        * iSCSI connection's iscsi_thread_set will be scheduled to
+        * execute upon.
+        */
+       ord = ts->thread_id % cpumask_weight(cpu_online_mask);
+#if 0
+       pr_debug(">>>>>>>>>>>>>>>>>>>> Generated ord: %d from"
+                       " thread_id: %d\n", ord, ts->thread_id);
+#endif
+       for_each_online_cpu(cpu) {
+               if (ord-- == 0) {
+                       cpumask_set_cpu(cpu, conn->conn_cpumask);
+                       return;
+               }
+       }
+       /*
+        * This should never be reached..
+        */
+       dump_stack();
+       cpumask_setall(conn->conn_cpumask);
+}
+
+static inline void iscsit_thread_check_cpumask(
+       struct iscsi_conn *conn,
+       struct task_struct *p,
+       int mode)
+{
+       char buf[128];
+       /*
+        * mode == 1 signals iscsi_target_tx_thread() usage.
+        * mode == 0 signals iscsi_target_rx_thread() usage.
+        */
+       if (mode == 1) {
+               if (!conn->conn_tx_reset_cpumask)
+                       return;
+               conn->conn_tx_reset_cpumask = 0;
+       } else {
+               if (!conn->conn_rx_reset_cpumask)
+                       return;
+               conn->conn_rx_reset_cpumask = 0;
+       }
+       /*
+        * Update the CPU mask for this single kthread so that
+        * both TX and RX kthreads are scheduled to run on the
+        * same CPU.
+        */
+       memset(buf, 0, 128);
+       cpumask_scnprintf(buf, 128, conn->conn_cpumask);
+#if 0
+       pr_debug(">>>>>>>>>>>>>> Calling set_cpus_allowed_ptr():"
+                       " %s for %s\n", buf, p->comm);
+#endif
+       set_cpus_allowed_ptr(p, conn->conn_cpumask);
+}
+
+#else
+#define iscsit_thread_get_cpumask(X) ({})
+#define iscsit_thread_check_cpumask(X, Y, Z) ({})
+#endif /* CONFIG_SMP */
+
+int iscsi_target_tx_thread(void *arg)
+{
+       u8 state;
+       int eodr = 0;
+       int ret = 0;
+       int sent_status = 0;
+       int use_misc = 0;
+       int map_sg = 0;
+       struct iscsi_cmd *cmd = NULL;
+       struct iscsi_conn *conn;
+       struct iscsi_queue_req *qr = NULL;
+       struct se_cmd *se_cmd;
+       struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg;
+       /*
+        * Allow ourselves to be interrupted by SIGINT so that a
+        * connection recovery / failure event can be triggered externally.
+        */
+       allow_signal(SIGINT);
+
+restart:
+       conn = iscsi_tx_thread_pre_handler(ts);
+       if (!conn)
+               goto out;
+
+       eodr = map_sg = ret = sent_status = use_misc = 0;
+
+       while (!kthread_should_stop()) {
+               /*
+                * Ensure that both TX and RX per connection kthreads
+                * are scheduled to run on the same CPU.
+                */
+               iscsit_thread_check_cpumask(conn, current, 1);
+
+               schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT);
+
+               if ((ts->status == ISCSI_THREAD_SET_RESET) ||
+                    signal_pending(current))
+                       goto transport_err;
+
+get_immediate:
+               qr = iscsit_get_cmd_from_immediate_queue(conn);
+               if (qr) {
+                       atomic_set(&conn->check_immediate_queue, 0);
+                       cmd = qr->cmd;
+                       state = qr->state;
+                       kmem_cache_free(lio_qr_cache, qr);
+
+                       spin_lock_bh(&cmd->istate_lock);
+                       switch (state) {
+                       case ISTATE_SEND_R2T:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               ret = iscsit_send_r2t(cmd, conn);
+                               break;
+                       case ISTATE_REMOVE:
+                               spin_unlock_bh(&cmd->istate_lock);
+
+                               if (cmd->data_direction == DMA_TO_DEVICE)
+                                       iscsit_stop_dataout_timer(cmd);
+
+                               spin_lock_bh(&conn->cmd_lock);
+                               list_del(&cmd->i_list);
+                               spin_unlock_bh(&conn->cmd_lock);
+                               /*
+                                * Determine if a struct se_cmd is assoicated with
+                                * this struct iscsi_cmd.
+                                */
+                               if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) &&
+                                   !(cmd->tmr_req))
+                                       iscsit_release_cmd(cmd);
+                               else
+                                       transport_generic_free_cmd(&cmd->se_cmd,
+                                                               1, 0);
+                               goto get_immediate;
+                       case ISTATE_SEND_NOPIN_WANT_RESPONSE:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               iscsit_mod_nopin_response_timer(conn);
+                               ret = iscsit_send_unsolicited_nopin(cmd,
+                                               conn, 1);
+                               break;
+                       case ISTATE_SEND_NOPIN_NO_RESPONSE:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               ret = iscsit_send_unsolicited_nopin(cmd,
+                                               conn, 0);
+                               break;
+                       default:
+                               pr_err("Unknown Opcode: 0x%02x ITT:"
+                               " 0x%08x, i_state: %d on CID: %hu\n",
+                               cmd->iscsi_opcode, cmd->init_task_tag, state,
+                               conn->cid);
+                               spin_unlock_bh(&cmd->istate_lock);
+                               goto transport_err;
+                       }
+                       if (ret < 0) {
+                               conn->tx_immediate_queue = 0;
+                               goto transport_err;
+                       }
+
+                       if (iscsit_send_tx_data(cmd, conn, 1) < 0) {
+                               conn->tx_immediate_queue = 0;
+                               iscsit_tx_thread_wait_for_tcp(conn);
+                               goto transport_err;
+                       }
+
+                       spin_lock_bh(&cmd->istate_lock);
+                       switch (state) {
+                       case ISTATE_SEND_R2T:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               spin_lock_bh(&cmd->dataout_timeout_lock);
+                               iscsit_start_dataout_timer(cmd, conn);
+                               spin_unlock_bh(&cmd->dataout_timeout_lock);
+                               break;
+                       case ISTATE_SEND_NOPIN_WANT_RESPONSE:
+                               cmd->i_state = ISTATE_SENT_NOPIN_WANT_RESPONSE;
+                               spin_unlock_bh(&cmd->istate_lock);
+                               break;
+                       case ISTATE_SEND_NOPIN_NO_RESPONSE:
+                               cmd->i_state = ISTATE_SENT_STATUS;
+                               spin_unlock_bh(&cmd->istate_lock);
+                               break;
+                       default:
+                               pr_err("Unknown Opcode: 0x%02x ITT:"
+                                       " 0x%08x, i_state: %d on CID: %hu\n",
+                                       cmd->iscsi_opcode, cmd->init_task_tag,
+                                       state, conn->cid);
+                               spin_unlock_bh(&cmd->istate_lock);
+                               goto transport_err;
+                       }
+                       goto get_immediate;
+               } else
+                       conn->tx_immediate_queue = 0;
+
+get_response:
+               qr = iscsit_get_cmd_from_response_queue(conn);
+               if (qr) {
+                       cmd = qr->cmd;
+                       state = qr->state;
+                       kmem_cache_free(lio_qr_cache, qr);
+
+                       spin_lock_bh(&cmd->istate_lock);
+check_rsp_state:
+                       switch (state) {
+                       case ISTATE_SEND_DATAIN:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               ret = iscsit_send_data_in(cmd, conn,
+                                                         &eodr);
+                               map_sg = 1;
+                               break;
+                       case ISTATE_SEND_STATUS:
+                       case ISTATE_SEND_STATUS_RECOVERY:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_status(cmd, conn);
+                               break;
+                       case ISTATE_SEND_LOGOUTRSP:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_logout_response(cmd, conn);
+                               break;
+                       case ISTATE_SEND_ASYNCMSG:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_conn_drop_async_message(
+                                               cmd, conn);
+                               break;
+                       case ISTATE_SEND_NOPIN:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_nopin_response(cmd, conn);
+                               break;
+                       case ISTATE_SEND_REJECT:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_reject(cmd, conn);
+                               break;
+                       case ISTATE_SEND_TASKMGTRSP:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_task_mgt_rsp(cmd, conn);
+                               if (ret != 0)
+                                       break;
+                               ret = iscsit_tmr_post_handler(cmd, conn);
+                               if (ret != 0)
+                                       iscsit_fall_back_to_erl0(conn->sess);
+                               break;
+                       case ISTATE_SEND_TEXTRSP:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               use_misc = 1;
+                               ret = iscsit_send_text_rsp(cmd, conn);
+                               break;
+                       default:
+                               pr_err("Unknown Opcode: 0x%02x ITT:"
+                                       " 0x%08x, i_state: %d on CID: %hu\n",
+                                       cmd->iscsi_opcode, cmd->init_task_tag,
+                                       state, conn->cid);
+                               spin_unlock_bh(&cmd->istate_lock);
+                               goto transport_err;
+                       }
+                       if (ret < 0) {
+                               conn->tx_response_queue = 0;
+                               goto transport_err;
+                       }
+
+                       se_cmd = &cmd->se_cmd;
+
+                       if (map_sg && !conn->conn_ops->IFMarker) {
+                               if (iscsit_fe_sendpage_sg(cmd, conn) < 0) {
+                                       conn->tx_response_queue = 0;
+                                       iscsit_tx_thread_wait_for_tcp(conn);
+                                       iscsit_unmap_iovec(cmd);
+                                       goto transport_err;
+                               }
+                       } else {
+                               if (iscsit_send_tx_data(cmd, conn, use_misc) < 0) {
+                                       conn->tx_response_queue = 0;
+                                       iscsit_tx_thread_wait_for_tcp(conn);
+                                       iscsit_unmap_iovec(cmd);
+                                       goto transport_err;
+                               }
+                       }
+                       map_sg = 0;
+                       iscsit_unmap_iovec(cmd);
+
+                       spin_lock_bh(&cmd->istate_lock);
+                       switch (state) {
+                       case ISTATE_SEND_DATAIN:
+                               if (!eodr)
+                                       goto check_rsp_state;
+
+                               if (eodr == 1) {
+                                       cmd->i_state = ISTATE_SENT_LAST_DATAIN;
+                                       sent_status = 1;
+                                       eodr = use_misc = 0;
+                               } else if (eodr == 2) {
+                                       cmd->i_state = state =
+                                                       ISTATE_SEND_STATUS;
+                                       sent_status = 0;
+                                       eodr = use_misc = 0;
+                                       goto check_rsp_state;
+                               }
+                               break;
+                       case ISTATE_SEND_STATUS:
+                               use_misc = 0;
+                               sent_status = 1;
+                               break;
+                       case ISTATE_SEND_ASYNCMSG:
+                       case ISTATE_SEND_NOPIN:
+                       case ISTATE_SEND_STATUS_RECOVERY:
+                       case ISTATE_SEND_TEXTRSP:
+                               use_misc = 0;
+                               sent_status = 1;
+                               break;
+                       case ISTATE_SEND_REJECT:
+                               use_misc = 0;
+                               if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) {
+                                       cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN;
+                                       spin_unlock_bh(&cmd->istate_lock);
+                                       complete(&cmd->reject_comp);
+                                       goto transport_err;
+                               }
+                               complete(&cmd->reject_comp);
+                               break;
+                       case ISTATE_SEND_TASKMGTRSP:
+                               use_misc = 0;
+                               sent_status = 1;
+                               break;
+                       case ISTATE_SEND_LOGOUTRSP:
+                               spin_unlock_bh(&cmd->istate_lock);
+                               if (!iscsit_logout_post_handler(cmd, conn))
+                                       goto restart;
+                               spin_lock_bh(&cmd->istate_lock);
+                               use_misc = 0;
+                               sent_status = 1;
+                               break;
+                       default:
+                               pr_err("Unknown Opcode: 0x%02x ITT:"
+                                       " 0x%08x, i_state: %d on CID: %hu\n",
+                                       cmd->iscsi_opcode, cmd->init_task_tag,
+                                       cmd->i_state, conn->cid);
+                               spin_unlock_bh(&cmd->istate_lock);
+                               goto transport_err;
+                       }
+
+                       if (sent_status) {
+                               cmd->i_state = ISTATE_SENT_STATUS;
+                               sent_status = 0;
+                       }
+                       spin_unlock_bh(&cmd->istate_lock);
+
+                       if (atomic_read(&conn->check_immediate_queue))
+                               goto get_immediate;
+
+                       goto get_response;
+               } else
+                       conn->tx_response_queue = 0;
+       }
+
+transport_err:
+       iscsit_take_action_for_connection_exit(conn);
+       goto restart;
+out:
+       return 0;
+}
+
+int iscsi_target_rx_thread(void *arg)
+{
+       int ret;
+       u8 buffer[ISCSI_HDR_LEN], opcode;
+       u32 checksum = 0, digest = 0;
+       struct iscsi_conn *conn = NULL;
+       struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg;
+       struct kvec iov;
+       /*
+        * Allow ourselves to be interrupted by SIGINT so that a
+        * connection recovery / failure event can be triggered externally.
+        */
+       allow_signal(SIGINT);
+
+restart:
+       conn = iscsi_rx_thread_pre_handler(ts);
+       if (!conn)
+               goto out;
+
+       while (!kthread_should_stop()) {
+               /*
+                * Ensure that both TX and RX per connection kthreads
+                * are scheduled to run on the same CPU.
+                */
+               iscsit_thread_check_cpumask(conn, current, 0);
+
+               memset(buffer, 0, ISCSI_HDR_LEN);
+               memset(&iov, 0, sizeof(struct kvec));
+
+               iov.iov_base    = buffer;
+               iov.iov_len     = ISCSI_HDR_LEN;
+
+               ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN);
+               if (ret != ISCSI_HDR_LEN) {
+                       iscsit_rx_thread_wait_for_tcp(conn);
+                       goto transport_err;
+               }
+
+               /*
+                * Set conn->bad_hdr for use with REJECT PDUs.
+                */
+               memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN);
+
+               if (conn->conn_ops->HeaderDigest) {
+                       iov.iov_base    = &digest;
+                       iov.iov_len     = ISCSI_CRC_LEN;
+
+                       ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
+                       if (ret != ISCSI_CRC_LEN) {
+                               iscsit_rx_thread_wait_for_tcp(conn);
+                               goto transport_err;
+                       }
+
+                       iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+                                       buffer, ISCSI_HDR_LEN,
+                                       0, NULL, (u8 *)&checksum);
+
+                       if (digest != checksum) {
+                               pr_err("HeaderDigest CRC32C failed,"
+                                       " received 0x%08x, computed 0x%08x\n",
+                                       digest, checksum);
+                               /*
+                                * Set the PDU to 0xff so it will intentionally
+                                * hit default in the switch below.
+                                */
+                               memset(buffer, 0xff, ISCSI_HDR_LEN);
+                               spin_lock_bh(&conn->sess->session_stats_lock);
+                               conn->sess->conn_digest_errors++;
+                               spin_unlock_bh(&conn->sess->session_stats_lock);
+                       } else {
+                               pr_debug("Got HeaderDigest CRC32C"
+                                               " 0x%08x\n", checksum);
+                       }
+               }
+
+               if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT)
+                       goto transport_err;
+
+               opcode = buffer[0] & ISCSI_OPCODE_MASK;
+
+               if (conn->sess->sess_ops->SessionType &&
+                  ((!(opcode & ISCSI_OP_TEXT)) ||
+                   (!(opcode & ISCSI_OP_LOGOUT)))) {
+                       pr_err("Received illegal iSCSI Opcode: 0x%02x"
+                       " while in Discovery Session, rejecting.\n", opcode);
+                       iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+                                       buffer, conn);
+                       goto transport_err;
+               }
+
+               switch (opcode) {
+               case ISCSI_OP_SCSI_CMD:
+                       if (iscsit_handle_scsi_cmd(conn, buffer) < 0)
+                               goto transport_err;
+                       break;
+               case ISCSI_OP_SCSI_DATA_OUT:
+                       if (iscsit_handle_data_out(conn, buffer) < 0)
+                               goto transport_err;
+                       break;
+               case ISCSI_OP_NOOP_OUT:
+                       if (iscsit_handle_nop_out(conn, buffer) < 0)
+                               goto transport_err;
+                       break;
+               case ISCSI_OP_SCSI_TMFUNC:
+                       if (iscsit_handle_task_mgt_cmd(conn, buffer) < 0)
+                               goto transport_err;
+                       break;
+               case ISCSI_OP_TEXT:
+                       if (iscsit_handle_text_cmd(conn, buffer) < 0)
+                               goto transport_err;
+                       break;
+               case ISCSI_OP_LOGOUT:
+                       ret = iscsit_handle_logout_cmd(conn, buffer);
+                       if (ret > 0) {
+                               wait_for_completion_timeout(&conn->conn_logout_comp,
+                                               SECONDS_FOR_LOGOUT_COMP * HZ);
+                               goto transport_err;
+                       } else if (ret < 0)
+                               goto transport_err;
+                       break;
+               case ISCSI_OP_SNACK:
+                       if (iscsit_handle_snack(conn, buffer) < 0)
+                               goto transport_err;
+                       break;
+               default:
+                       pr_err("Got unknown iSCSI OpCode: 0x%02x\n",
+                                       opcode);
+                       if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+                               pr_err("Cannot recover from unknown"
+                               " opcode while ERL=0, closing iSCSI connection"
+                               ".\n");
+                               goto transport_err;
+                       }
+                       if (!conn->conn_ops->OFMarker) {
+                               pr_err("Unable to recover from unknown"
+                               " opcode while OFMarker=No, closing iSCSI"
+                                       " connection.\n");
+                               goto transport_err;
+                       }
+                       if (iscsit_recover_from_unknown_opcode(conn) < 0) {
+                               pr_err("Unable to recover from unknown"
+                                       " opcode, closing iSCSI connection.\n");
+                               goto transport_err;
+                       }
+                       break;
+               }
+       }
+
+transport_err:
+       if (!signal_pending(current))
+               atomic_set(&conn->transport_failed, 1);
+       iscsit_take_action_for_connection_exit(conn);
+       goto restart;
+out:
+       return 0;
+}
+
+static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+{
+       struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL;
+       struct iscsi_session *sess = conn->sess;
+       struct se_cmd *se_cmd;
+       /*
+        * We expect this function to only ever be called from either RX or TX
+        * thread context via iscsit_close_connection() once the other context
+        * has been reset -> returned sleeping pre-handler state.
+        */
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+               if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)) {
+
+                       list_del(&cmd->i_list);
+                       spin_unlock_bh(&conn->cmd_lock);
+                       iscsit_increment_maxcmdsn(cmd, sess);
+                       se_cmd = &cmd->se_cmd;
+                       /*
+                        * Special cases for active iSCSI TMR, and
+                        * transport_lookup_cmd_lun() failing from
+                        * iscsit_get_lun_for_cmd() in iscsit_handle_scsi_cmd().
+                        */
+                       if (cmd->tmr_req && se_cmd->transport_wait_for_tasks)
+                               se_cmd->transport_wait_for_tasks(se_cmd, 1, 1);
+                       else if (cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)
+                               transport_release_cmd(se_cmd);
+                       else
+                               iscsit_release_cmd(cmd);
+
+                       spin_lock_bh(&conn->cmd_lock);
+                       continue;
+               }
+               list_del(&cmd->i_list);
+               spin_unlock_bh(&conn->cmd_lock);
+
+               iscsit_increment_maxcmdsn(cmd, sess);
+               se_cmd = &cmd->se_cmd;
+
+               if (se_cmd->transport_wait_for_tasks)
+                       se_cmd->transport_wait_for_tasks(se_cmd, 1, 1);
+
+               spin_lock_bh(&conn->cmd_lock);
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+}
+
+static void iscsit_stop_timers_for_cmds(
+       struct iscsi_conn *conn)
+{
+       struct iscsi_cmd *cmd;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+               if (cmd->data_direction == DMA_TO_DEVICE)
+                       iscsit_stop_dataout_timer(cmd);
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+}
+
+int iscsit_close_connection(
+       struct iscsi_conn *conn)
+{
+       int conn_logout = (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT);
+       struct iscsi_session    *sess = conn->sess;
+
+       pr_debug("Closing iSCSI connection CID %hu on SID:"
+               " %u\n", conn->cid, sess->sid);
+       /*
+        * Always up conn_logout_comp just in case the RX Thread is sleeping
+        * and the logout response never got sent because the connection
+        * failed.
+        */
+       complete(&conn->conn_logout_comp);
+
+       iscsi_release_thread_set(conn);
+
+       iscsit_stop_timers_for_cmds(conn);
+       iscsit_stop_nopin_response_timer(conn);
+       iscsit_stop_nopin_timer(conn);
+       iscsit_free_queue_reqs_for_conn(conn);
+
+       /*
+        * During Connection recovery drop unacknowledged out of order
+        * commands for this connection, and prepare the other commands
+        * for realligence.
+        *
+        * During normal operation clear the out of order commands (but
+        * do not free the struct iscsi_ooo_cmdsn's) and release all
+        * struct iscsi_cmds.
+        */
+       if (atomic_read(&conn->connection_recovery)) {
+               iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn);
+               iscsit_prepare_cmds_for_realligance(conn);
+       } else {
+               iscsit_clear_ooo_cmdsns_for_conn(conn);
+               iscsit_release_commands_from_conn(conn);
+       }
+
+       /*
+        * Handle decrementing session or connection usage count if
+        * a logout response was not able to be sent because the
+        * connection failed.  Fall back to Session Recovery here.
+        */
+       if (atomic_read(&conn->conn_logout_remove)) {
+               if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) {
+                       iscsit_dec_conn_usage_count(conn);
+                       iscsit_dec_session_usage_count(sess);
+               }
+               if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION)
+                       iscsit_dec_conn_usage_count(conn);
+
+               atomic_set(&conn->conn_logout_remove, 0);
+               atomic_set(&sess->session_reinstatement, 0);
+               atomic_set(&sess->session_fall_back_to_erl0, 1);
+       }
+
+       spin_lock_bh(&sess->conn_lock);
+       list_del(&conn->conn_list);
+
+       /*
+        * Attempt to let the Initiator know this connection failed by
+        * sending an Connection Dropped Async Message on another
+        * active connection.
+        */
+       if (atomic_read(&conn->connection_recovery))
+               iscsit_build_conn_drop_async_message(conn);
+
+       spin_unlock_bh(&sess->conn_lock);
+
+       /*
+        * If connection reinstatement is being performed on this connection,
+        * up the connection reinstatement semaphore that is being blocked on
+        * in iscsit_cause_connection_reinstatement().
+        */
+       spin_lock_bh(&conn->state_lock);
+       if (atomic_read(&conn->sleep_on_conn_wait_comp)) {
+               spin_unlock_bh(&conn->state_lock);
+               complete(&conn->conn_wait_comp);
+               wait_for_completion(&conn->conn_post_wait_comp);
+               spin_lock_bh(&conn->state_lock);
+       }
+
+       /*
+        * If connection reinstatement is being performed on this connection
+        * by receiving a REMOVECONNFORRECOVERY logout request, up the
+        * connection wait rcfr semaphore that is being blocked on
+        * an iscsit_connection_reinstatement_rcfr().
+        */
+       if (atomic_read(&conn->connection_wait_rcfr)) {
+               spin_unlock_bh(&conn->state_lock);
+               complete(&conn->conn_wait_rcfr_comp);
+               wait_for_completion(&conn->conn_post_wait_comp);
+               spin_lock_bh(&conn->state_lock);
+       }
+       atomic_set(&conn->connection_reinstatement, 1);
+       spin_unlock_bh(&conn->state_lock);
+
+       /*
+        * If any other processes are accessing this connection pointer we
+        * must wait until they have completed.
+        */
+       iscsit_check_conn_usage_count(conn);
+
+       if (conn->conn_rx_hash.tfm)
+               crypto_free_hash(conn->conn_rx_hash.tfm);
+       if (conn->conn_tx_hash.tfm)
+               crypto_free_hash(conn->conn_tx_hash.tfm);
+
+       if (conn->conn_cpumask)
+               free_cpumask_var(conn->conn_cpumask);
+
+       kfree(conn->conn_ops);
+       conn->conn_ops = NULL;
+
+       if (conn->sock) {
+               if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+                       kfree(conn->sock->file);
+                       conn->sock->file = NULL;
+               }
+               sock_release(conn->sock);
+       }
+       conn->thread_set = NULL;
+
+       pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+       conn->conn_state = TARG_CONN_STATE_FREE;
+       kfree(conn);
+
+       spin_lock_bh(&sess->conn_lock);
+       atomic_dec(&sess->nconn);
+       pr_debug("Decremented iSCSI connection count to %hu from node:"
+               " %s\n", atomic_read(&sess->nconn),
+               sess->sess_ops->InitiatorName);
+       /*
+        * Make sure that if one connection fails in an non ERL=2 iSCSI
+        * Session that they all fail.
+        */
+       if ((sess->sess_ops->ErrorRecoveryLevel != 2) && !conn_logout &&
+            !atomic_read(&sess->session_logout))
+               atomic_set(&sess->session_fall_back_to_erl0, 1);
+
+       /*
+        * If this was not the last connection in the session, and we are
+        * performing session reinstatement or falling back to ERL=0, call
+        * iscsit_stop_session() without sleeping to shutdown the other
+        * active connections.
+        */
+       if (atomic_read(&sess->nconn)) {
+               if (!atomic_read(&sess->session_reinstatement) &&
+                   !atomic_read(&sess->session_fall_back_to_erl0)) {
+                       spin_unlock_bh(&sess->conn_lock);
+                       return 0;
+               }
+               if (!atomic_read(&sess->session_stop_active)) {
+                       atomic_set(&sess->session_stop_active, 1);
+                       spin_unlock_bh(&sess->conn_lock);
+                       iscsit_stop_session(sess, 0, 0);
+                       return 0;
+               }
+               spin_unlock_bh(&sess->conn_lock);
+               return 0;
+       }
+
+       /*
+        * If this was the last connection in the session and one of the
+        * following is occurring:
+        *
+        * Session Reinstatement is not being performed, and are falling back
+        * to ERL=0 call iscsit_close_session().
+        *
+        * Session Logout was requested.  iscsit_close_session() will be called
+        * elsewhere.
+        *
+        * Session Continuation is not being performed, start the Time2Retain
+        * handler and check if sleep_on_sess_wait_sem is active.
+        */
+       if (!atomic_read(&sess->session_reinstatement) &&
+            atomic_read(&sess->session_fall_back_to_erl0)) {
+               spin_unlock_bh(&sess->conn_lock);
+               iscsit_close_session(sess);
+
+               return 0;
+       } else if (atomic_read(&sess->session_logout)) {
+               pr_debug("Moving to TARG_SESS_STATE_FREE.\n");
+               sess->session_state = TARG_SESS_STATE_FREE;
+               spin_unlock_bh(&sess->conn_lock);
+
+               if (atomic_read(&sess->sleep_on_sess_wait_comp))
+                       complete(&sess->session_wait_comp);
+
+               return 0;
+       } else {
+               pr_debug("Moving to TARG_SESS_STATE_FAILED.\n");
+               sess->session_state = TARG_SESS_STATE_FAILED;
+
+               if (!atomic_read(&sess->session_continuation)) {
+                       spin_unlock_bh(&sess->conn_lock);
+                       iscsit_start_time2retain_handler(sess);
+               } else
+                       spin_unlock_bh(&sess->conn_lock);
+
+               if (atomic_read(&sess->sleep_on_sess_wait_comp))
+                       complete(&sess->session_wait_comp);
+
+               return 0;
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       return 0;
+}
+
+int iscsit_close_session(struct iscsi_session *sess)
+{
+       struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+       if (atomic_read(&sess->nconn)) {
+               pr_err("%d connection(s) still exist for iSCSI session"
+                       " to %s\n", atomic_read(&sess->nconn),
+                       sess->sess_ops->InitiatorName);
+               BUG();
+       }
+
+       spin_lock_bh(&se_tpg->session_lock);
+       atomic_set(&sess->session_logout, 1);
+       atomic_set(&sess->session_reinstatement, 1);
+       iscsit_stop_time2retain_timer(sess);
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       /*
+        * transport_deregister_session_configfs() will clear the
+        * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context
+        * can be setting it again with __transport_register_session() in
+        * iscsi_post_login_handler() again after the iscsit_stop_session()
+        * completes in iscsi_np context.
+        */
+       transport_deregister_session_configfs(sess->se_sess);
+
+       /*
+        * If any other processes are accessing this session pointer we must
+        * wait until they have completed.  If we are in an interrupt (the
+        * time2retain handler) and contain and active session usage count we
+        * restart the timer and exit.
+        */
+       if (!in_interrupt()) {
+               if (iscsit_check_session_usage_count(sess) == 1)
+                       iscsit_stop_session(sess, 1, 1);
+       } else {
+               if (iscsit_check_session_usage_count(sess) == 2) {
+                       atomic_set(&sess->session_logout, 0);
+                       iscsit_start_time2retain_handler(sess);
+                       return 0;
+               }
+       }
+
+       transport_deregister_session(sess->se_sess);
+
+       if (sess->sess_ops->ErrorRecoveryLevel == 2)
+               iscsit_free_connection_recovery_entires(sess);
+
+       iscsit_free_all_ooo_cmdsns(sess);
+
+       spin_lock_bh(&se_tpg->session_lock);
+       pr_debug("Moving to TARG_SESS_STATE_FREE.\n");
+       sess->session_state = TARG_SESS_STATE_FREE;
+       pr_debug("Released iSCSI session from node: %s\n",
+                       sess->sess_ops->InitiatorName);
+       tpg->nsessions--;
+       if (tpg->tpg_tiqn)
+               tpg->tpg_tiqn->tiqn_nsessions--;
+
+       pr_debug("Decremented number of active iSCSI Sessions on"
+               " iSCSI TPG: %hu to %u\n", tpg->tpgt, tpg->nsessions);
+
+       spin_lock(&sess_idr_lock);
+       idr_remove(&sess_idr, sess->session_index);
+       spin_unlock(&sess_idr_lock);
+
+       kfree(sess->sess_ops);
+       sess->sess_ops = NULL;
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       kfree(sess);
+       return 0;
+}
+
+static void iscsit_logout_post_handler_closesession(
+       struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+
+       iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
+       iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+
+       atomic_set(&conn->conn_logout_remove, 0);
+       complete(&conn->conn_logout_comp);
+
+       iscsit_dec_conn_usage_count(conn);
+       iscsit_stop_session(sess, 1, 1);
+       iscsit_dec_session_usage_count(sess);
+       iscsit_close_session(sess);
+}
+
+static void iscsit_logout_post_handler_samecid(
+       struct iscsi_conn *conn)
+{
+       iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
+       iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+
+       atomic_set(&conn->conn_logout_remove, 0);
+       complete(&conn->conn_logout_comp);
+
+       iscsit_cause_connection_reinstatement(conn, 1);
+       iscsit_dec_conn_usage_count(conn);
+}
+
+static void iscsit_logout_post_handler_diffcid(
+       struct iscsi_conn *conn,
+       u16 cid)
+{
+       struct iscsi_conn *l_conn;
+       struct iscsi_session *sess = conn->sess;
+
+       if (!sess)
+               return;
+
+       spin_lock_bh(&sess->conn_lock);
+       list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) {
+               if (l_conn->cid == cid) {
+                       iscsit_inc_conn_usage_count(l_conn);
+                       break;
+               }
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       if (!l_conn)
+               return;
+
+       if (l_conn->sock)
+               l_conn->sock->ops->shutdown(l_conn->sock, RCV_SHUTDOWN);
+
+       spin_lock_bh(&l_conn->state_lock);
+       pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+       l_conn->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+       spin_unlock_bh(&l_conn->state_lock);
+
+       iscsit_cause_connection_reinstatement(l_conn, 1);
+       iscsit_dec_conn_usage_count(l_conn);
+}
+
+/*
+ *     Return of 0 causes the TX thread to restart.
+ */
+static int iscsit_logout_post_handler(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       int ret = 0;
+
+       switch (cmd->logout_reason) {
+       case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+               switch (cmd->logout_response) {
+               case ISCSI_LOGOUT_SUCCESS:
+               case ISCSI_LOGOUT_CLEANUP_FAILED:
+               default:
+                       iscsit_logout_post_handler_closesession(conn);
+                       break;
+               }
+               ret = 0;
+               break;
+       case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+               if (conn->cid == cmd->logout_cid) {
+                       switch (cmd->logout_response) {
+                       case ISCSI_LOGOUT_SUCCESS:
+                       case ISCSI_LOGOUT_CLEANUP_FAILED:
+                       default:
+                               iscsit_logout_post_handler_samecid(conn);
+                               break;
+                       }
+                       ret = 0;
+               } else {
+                       switch (cmd->logout_response) {
+                       case ISCSI_LOGOUT_SUCCESS:
+                               iscsit_logout_post_handler_diffcid(conn,
+                                       cmd->logout_cid);
+                               break;
+                       case ISCSI_LOGOUT_CID_NOT_FOUND:
+                       case ISCSI_LOGOUT_CLEANUP_FAILED:
+                       default:
+                               break;
+                       }
+                       ret = 1;
+               }
+               break;
+       case ISCSI_LOGOUT_REASON_RECOVERY:
+               switch (cmd->logout_response) {
+               case ISCSI_LOGOUT_SUCCESS:
+               case ISCSI_LOGOUT_CID_NOT_FOUND:
+               case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED:
+               case ISCSI_LOGOUT_CLEANUP_FAILED:
+               default:
+                       break;
+               }
+               ret = 1;
+               break;
+       default:
+               break;
+
+       }
+       return ret;
+}
+
+void iscsit_fail_session(struct iscsi_session *sess)
+{
+       struct iscsi_conn *conn;
+
+       spin_lock_bh(&sess->conn_lock);
+       list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+               pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n");
+               conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT;
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       pr_debug("Moving to TARG_SESS_STATE_FAILED.\n");
+       sess->session_state = TARG_SESS_STATE_FAILED;
+}
+
+int iscsit_free_session(struct iscsi_session *sess)
+{
+       u16 conn_count = atomic_read(&sess->nconn);
+       struct iscsi_conn *conn, *conn_tmp = NULL;
+       int is_last;
+
+       spin_lock_bh(&sess->conn_lock);
+       atomic_set(&sess->sleep_on_sess_wait_comp, 1);
+
+       list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list,
+                       conn_list) {
+               if (conn_count == 0)
+                       break;
+
+               if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) {
+                       is_last = 1;
+               } else {
+                       iscsit_inc_conn_usage_count(conn_tmp);
+                       is_last = 0;
+               }
+               iscsit_inc_conn_usage_count(conn);
+
+               spin_unlock_bh(&sess->conn_lock);
+               iscsit_cause_connection_reinstatement(conn, 1);
+               spin_lock_bh(&sess->conn_lock);
+
+               iscsit_dec_conn_usage_count(conn);
+               if (is_last == 0)
+                       iscsit_dec_conn_usage_count(conn_tmp);
+
+               conn_count--;
+       }
+
+       if (atomic_read(&sess->nconn)) {
+               spin_unlock_bh(&sess->conn_lock);
+               wait_for_completion(&sess->session_wait_comp);
+       } else
+               spin_unlock_bh(&sess->conn_lock);
+
+       iscsit_close_session(sess);
+       return 0;
+}
+
+void iscsit_stop_session(
+       struct iscsi_session *sess,
+       int session_sleep,
+       int connection_sleep)
+{
+       u16 conn_count = atomic_read(&sess->nconn);
+       struct iscsi_conn *conn, *conn_tmp = NULL;
+       int is_last;
+
+       spin_lock_bh(&sess->conn_lock);
+       if (session_sleep)
+               atomic_set(&sess->sleep_on_sess_wait_comp, 1);
+
+       if (connection_sleep) {
+               list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list,
+                               conn_list) {
+                       if (conn_count == 0)
+                               break;
+
+                       if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) {
+                               is_last = 1;
+                       } else {
+                               iscsit_inc_conn_usage_count(conn_tmp);
+                               is_last = 0;
+                       }
+                       iscsit_inc_conn_usage_count(conn);
+
+                       spin_unlock_bh(&sess->conn_lock);
+                       iscsit_cause_connection_reinstatement(conn, 1);
+                       spin_lock_bh(&sess->conn_lock);
+
+                       iscsit_dec_conn_usage_count(conn);
+                       if (is_last == 0)
+                               iscsit_dec_conn_usage_count(conn_tmp);
+                       conn_count--;
+               }
+       } else {
+               list_for_each_entry(conn, &sess->sess_conn_list, conn_list)
+                       iscsit_cause_connection_reinstatement(conn, 0);
+       }
+
+       if (session_sleep && atomic_read(&sess->nconn)) {
+               spin_unlock_bh(&sess->conn_lock);
+               wait_for_completion(&sess->session_wait_comp);
+       } else
+               spin_unlock_bh(&sess->conn_lock);
+}
+
+int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force)
+{
+       struct iscsi_session *sess;
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+       struct se_session *se_sess, *se_sess_tmp;
+       int session_count = 0;
+
+       spin_lock_bh(&se_tpg->session_lock);
+       if (tpg->nsessions && !force) {
+               spin_unlock_bh(&se_tpg->session_lock);
+               return -1;
+       }
+
+       list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+                       sess_list) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+
+               spin_lock(&sess->conn_lock);
+               if (atomic_read(&sess->session_fall_back_to_erl0) ||
+                   atomic_read(&sess->session_logout) ||
+                   (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+                       spin_unlock(&sess->conn_lock);
+                       continue;
+               }
+               atomic_set(&sess->session_reinstatement, 1);
+               spin_unlock(&sess->conn_lock);
+               spin_unlock_bh(&se_tpg->session_lock);
+
+               iscsit_free_session(sess);
+               spin_lock_bh(&se_tpg->session_lock);
+
+               session_count++;
+       }
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       pr_debug("Released %d iSCSI Session(s) from Target Portal"
+                       " Group: %hu\n", session_count, tpg->tpgt);
+       return 0;
+}
+
+MODULE_DESCRIPTION("iSCSI-Target Driver for mainline target infrastructure");
+MODULE_VERSION("4.1.x");
+MODULE_AUTHOR("nab@Linux-iSCSI.org");
+MODULE_LICENSE("GPL");
+
+module_init(iscsi_target_init_module);
+module_exit(iscsi_target_cleanup_module);
diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h
new file mode 100644 (file)
index 0000000..5db2dde
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef ISCSI_TARGET_H
+#define ISCSI_TARGET_H
+
+extern struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *);
+extern struct iscsi_tiqn *iscsit_get_tiqn(unsigned char *, int);
+extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *);
+extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *);
+extern void iscsit_del_tiqn(struct iscsi_tiqn *);
+extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *,
+                               char *, int);
+extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *,
+                               struct iscsi_portal_group *);
+extern int iscsit_del_np(struct iscsi_np *);
+extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *);
+extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_send_async_msg(struct iscsi_conn *, u16, u8, u8);
+extern int iscsit_send_r2t(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_build_r2ts_for_cmd(struct iscsi_cmd *, struct iscsi_conn *, int);
+extern void iscsit_thread_get_cpumask(struct iscsi_conn *);
+extern int iscsi_target_tx_thread(void *);
+extern int iscsi_target_rx_thread(void *);
+extern int iscsit_close_connection(struct iscsi_conn *);
+extern int iscsit_close_session(struct iscsi_session *);
+extern void iscsit_fail_session(struct iscsi_session *);
+extern int iscsit_free_session(struct iscsi_session *);
+extern void iscsit_stop_session(struct iscsi_session *, int, int);
+extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int);
+
+extern struct iscsit_global *iscsit_global;
+extern struct target_fabric_configfs *lio_target_fabric_configfs;
+
+extern struct kmem_cache *lio_dr_cache;
+extern struct kmem_cache *lio_ooo_cache;
+extern struct kmem_cache *lio_cmd_cache;
+extern struct kmem_cache *lio_qr_cache;
+extern struct kmem_cache *lio_r2t_cache;
+
+#endif   /*** ISCSI_TARGET_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
new file mode 100644 (file)
index 0000000..11fd743
--- /dev/null
@@ -0,0 +1,490 @@
+/*******************************************************************************
+ * This file houses the main functions for the iSCSI CHAP support
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_auth.h"
+
+static unsigned char chap_asciihex_to_binaryhex(unsigned char val[2])
+{
+       unsigned char result = 0;
+       /*
+        * MSB
+        */
+       if ((val[0] >= 'a') && (val[0] <= 'f'))
+               result = ((val[0] - 'a' + 10) & 0xf) << 4;
+       else
+               if ((val[0] >= 'A') && (val[0] <= 'F'))
+                       result = ((val[0] - 'A' + 10) & 0xf) << 4;
+               else /* digit */
+                       result = ((val[0] - '0') & 0xf) << 4;
+       /*
+        * LSB
+        */
+       if ((val[1] >= 'a') && (val[1] <= 'f'))
+               result |= ((val[1] - 'a' + 10) & 0xf);
+       else
+               if ((val[1] >= 'A') && (val[1] <= 'F'))
+                       result |= ((val[1] - 'A' + 10) & 0xf);
+               else /* digit */
+                       result |= ((val[1] - '0') & 0xf);
+
+       return result;
+}
+
+static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len)
+{
+       int i, j = 0;
+
+       for (i = 0; i < len; i += 2) {
+               dst[j++] = (unsigned char) chap_asciihex_to_binaryhex(&src[i]);
+       }
+
+       dst[j] = '\0';
+       return j;
+}
+
+static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
+{
+       int i;
+
+       for (i = 0; i < src_len; i++) {
+               sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff);
+       }
+}
+
+static void chap_set_random(char *data, int length)
+{
+       long r;
+       unsigned n;
+
+       while (length > 0) {
+               get_random_bytes(&r, sizeof(long));
+               r = r ^ (r >> 8);
+               r = r ^ (r >> 4);
+               n = r & 0x7;
+
+               get_random_bytes(&r, sizeof(long));
+               r = r ^ (r >> 8);
+               r = r ^ (r >> 5);
+               n = (n << 3) | (r & 0x7);
+
+               get_random_bytes(&r, sizeof(long));
+               r = r ^ (r >> 8);
+               r = r ^ (r >> 5);
+               n = (n << 2) | (r & 0x3);
+
+               *data++ = n;
+               length--;
+       }
+}
+
+static void chap_gen_challenge(
+       struct iscsi_conn *conn,
+       int caller,
+       char *c_str,
+       unsigned int *c_len)
+{
+       unsigned char challenge_asciihex[CHAP_CHALLENGE_LENGTH * 2 + 1];
+       struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+       memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1);
+
+       chap_set_random(chap->challenge, CHAP_CHALLENGE_LENGTH);
+       chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge,
+                               CHAP_CHALLENGE_LENGTH);
+       /*
+        * Set CHAP_C, and copy the generated challenge into c_str.
+        */
+       *c_len += sprintf(c_str + *c_len, "CHAP_C=0x%s", challenge_asciihex);
+       *c_len += 1;
+
+       pr_debug("[%s] Sending CHAP_C=0x%s\n\n", (caller) ? "server" : "client",
+                       challenge_asciihex);
+}
+
+
+static struct iscsi_chap *chap_server_open(
+       struct iscsi_conn *conn,
+       struct iscsi_node_auth *auth,
+       const char *a_str,
+       char *aic_str,
+       unsigned int *aic_len)
+{
+       struct iscsi_chap *chap;
+
+       if (!(auth->naf_flags & NAF_USERID_SET) ||
+           !(auth->naf_flags & NAF_PASSWORD_SET)) {
+               pr_err("CHAP user or password not set for"
+                               " Initiator ACL\n");
+               return NULL;
+       }
+
+       conn->auth_protocol = kzalloc(sizeof(struct iscsi_chap), GFP_KERNEL);
+       if (!conn->auth_protocol)
+               return NULL;
+
+       chap = (struct iscsi_chap *) conn->auth_protocol;
+       /*
+        * We only support MD5 MDA presently.
+        */
+       if (strncmp(a_str, "CHAP_A=5", 8)) {
+               pr_err("CHAP_A is not MD5.\n");
+               return NULL;
+       }
+       pr_debug("[server] Got CHAP_A=5\n");
+       /*
+        * Send back CHAP_A set to MD5.
+        */
+       *aic_len = sprintf(aic_str, "CHAP_A=5");
+       *aic_len += 1;
+       chap->digest_type = CHAP_DIGEST_MD5;
+       pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+       /*
+        * Set Identifier.
+        */
+       chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++;
+       *aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id);
+       *aic_len += 1;
+       pr_debug("[server] Sending CHAP_I=%d\n", chap->id);
+       /*
+        * Generate Challenge.
+        */
+       chap_gen_challenge(conn, 1, aic_str, aic_len);
+
+       return chap;
+}
+
+static void chap_close(struct iscsi_conn *conn)
+{
+       kfree(conn->auth_protocol);
+       conn->auth_protocol = NULL;
+}
+
+static int chap_server_compute_md5(
+       struct iscsi_conn *conn,
+       struct iscsi_node_auth *auth,
+       char *nr_in_ptr,
+       char *nr_out_ptr,
+       unsigned int *nr_out_len)
+{
+       char *endptr;
+       unsigned char id, digest[MD5_SIGNATURE_SIZE];
+       unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2];
+       unsigned char identifier[10], *challenge = NULL;
+       unsigned char *challenge_binhex = NULL;
+       unsigned char client_digest[MD5_SIGNATURE_SIZE];
+       unsigned char server_digest[MD5_SIGNATURE_SIZE];
+       unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH];
+       struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+       struct crypto_hash *tfm;
+       struct hash_desc desc;
+       struct scatterlist sg;
+       int auth_ret = -1, ret, challenge_len;
+
+       memset(identifier, 0, 10);
+       memset(chap_n, 0, MAX_CHAP_N_SIZE);
+       memset(chap_r, 0, MAX_RESPONSE_LENGTH);
+       memset(digest, 0, MD5_SIGNATURE_SIZE);
+       memset(response, 0, MD5_SIGNATURE_SIZE * 2 + 2);
+       memset(client_digest, 0, MD5_SIGNATURE_SIZE);
+       memset(server_digest, 0, MD5_SIGNATURE_SIZE);
+
+       challenge = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL);
+       if (!challenge) {
+               pr_err("Unable to allocate challenge buffer\n");
+               goto out;
+       }
+
+       challenge_binhex = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL);
+       if (!challenge_binhex) {
+               pr_err("Unable to allocate challenge_binhex buffer\n");
+               goto out;
+       }
+       /*
+        * Extract CHAP_N.
+        */
+       if (extract_param(nr_in_ptr, "CHAP_N", MAX_CHAP_N_SIZE, chap_n,
+                               &type) < 0) {
+               pr_err("Could not find CHAP_N.\n");
+               goto out;
+       }
+       if (type == HEX) {
+               pr_err("Could not find CHAP_N.\n");
+               goto out;
+       }
+
+       if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) {
+               pr_err("CHAP_N values do not match!\n");
+               goto out;
+       }
+       pr_debug("[server] Got CHAP_N=%s\n", chap_n);
+       /*
+        * Extract CHAP_R.
+        */
+       if (extract_param(nr_in_ptr, "CHAP_R", MAX_RESPONSE_LENGTH, chap_r,
+                               &type) < 0) {
+               pr_err("Could not find CHAP_R.\n");
+               goto out;
+       }
+       if (type != HEX) {
+               pr_err("Could not find CHAP_R.\n");
+               goto out;
+       }
+
+       pr_debug("[server] Got CHAP_R=%s\n", chap_r);
+       chap_string_to_hex(client_digest, chap_r, strlen(chap_r));
+
+       tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(tfm)) {
+               pr_err("Unable to allocate struct crypto_hash\n");
+               goto out;
+       }
+       desc.tfm = tfm;
+       desc.flags = 0;
+
+       ret = crypto_hash_init(&desc);
+       if (ret < 0) {
+               pr_err("crypto_hash_init() failed\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       sg_init_one(&sg, (void *)&chap->id, 1);
+       ret = crypto_hash_update(&desc, &sg, 1);
+       if (ret < 0) {
+               pr_err("crypto_hash_update() failed for id\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       sg_init_one(&sg, (void *)&auth->password, strlen(auth->password));
+       ret = crypto_hash_update(&desc, &sg, strlen(auth->password));
+       if (ret < 0) {
+               pr_err("crypto_hash_update() failed for password\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       sg_init_one(&sg, (void *)chap->challenge, CHAP_CHALLENGE_LENGTH);
+       ret = crypto_hash_update(&desc, &sg, CHAP_CHALLENGE_LENGTH);
+       if (ret < 0) {
+               pr_err("crypto_hash_update() failed for challenge\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       ret = crypto_hash_final(&desc, server_digest);
+       if (ret < 0) {
+               pr_err("crypto_hash_final() failed for server digest\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+       crypto_free_hash(tfm);
+
+       chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE);
+       pr_debug("[server] MD5 Server Digest: %s\n", response);
+
+       if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) {
+               pr_debug("[server] MD5 Digests do not match!\n\n");
+               goto out;
+       } else
+               pr_debug("[server] MD5 Digests match, CHAP connetication"
+                               " successful.\n\n");
+       /*
+        * One way authentication has succeeded, return now if mutual
+        * authentication is not enabled.
+        */
+       if (!auth->authenticate_target) {
+               kfree(challenge);
+               kfree(challenge_binhex);
+               return 0;
+       }
+       /*
+        * Get CHAP_I.
+        */
+       if (extract_param(nr_in_ptr, "CHAP_I", 10, identifier, &type) < 0) {
+               pr_err("Could not find CHAP_I.\n");
+               goto out;
+       }
+
+       if (type == HEX)
+               id = (unsigned char)simple_strtoul((char *)&identifier[2],
+                                       &endptr, 0);
+       else
+               id = (unsigned char)simple_strtoul(identifier, &endptr, 0);
+       /*
+        * RFC 1994 says Identifier is no more than octet (8 bits).
+        */
+       pr_debug("[server] Got CHAP_I=%d\n", id);
+       /*
+        * Get CHAP_C.
+        */
+       if (extract_param(nr_in_ptr, "CHAP_C", CHAP_CHALLENGE_STR_LEN,
+                       challenge, &type) < 0) {
+               pr_err("Could not find CHAP_C.\n");
+               goto out;
+       }
+
+       if (type != HEX) {
+               pr_err("Could not find CHAP_C.\n");
+               goto out;
+       }
+       pr_debug("[server] Got CHAP_C=%s\n", challenge);
+       challenge_len = chap_string_to_hex(challenge_binhex, challenge,
+                               strlen(challenge));
+       if (!challenge_len) {
+               pr_err("Unable to convert incoming challenge\n");
+               goto out;
+       }
+       /*
+        * Generate CHAP_N and CHAP_R for mutual authentication.
+        */
+       tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(tfm)) {
+               pr_err("Unable to allocate struct crypto_hash\n");
+               goto out;
+       }
+       desc.tfm = tfm;
+       desc.flags = 0;
+
+       ret = crypto_hash_init(&desc);
+       if (ret < 0) {
+               pr_err("crypto_hash_init() failed\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       sg_init_one(&sg, (void *)&id, 1);
+       ret = crypto_hash_update(&desc, &sg, 1);
+       if (ret < 0) {
+               pr_err("crypto_hash_update() failed for id\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       sg_init_one(&sg, (void *)auth->password_mutual,
+                               strlen(auth->password_mutual));
+       ret = crypto_hash_update(&desc, &sg, strlen(auth->password_mutual));
+       if (ret < 0) {
+               pr_err("crypto_hash_update() failed for"
+                               " password_mutual\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+       /*
+        * Convert received challenge to binary hex.
+        */
+       sg_init_one(&sg, (void *)challenge_binhex, challenge_len);
+       ret = crypto_hash_update(&desc, &sg, challenge_len);
+       if (ret < 0) {
+               pr_err("crypto_hash_update() failed for ma challenge\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+
+       ret = crypto_hash_final(&desc, digest);
+       if (ret < 0) {
+               pr_err("crypto_hash_final() failed for ma digest\n");
+               crypto_free_hash(tfm);
+               goto out;
+       }
+       crypto_free_hash(tfm);
+       /*
+        * Generate CHAP_N and CHAP_R.
+        */
+       *nr_out_len = sprintf(nr_out_ptr, "CHAP_N=%s", auth->userid_mutual);
+       *nr_out_len += 1;
+       pr_debug("[server] Sending CHAP_N=%s\n", auth->userid_mutual);
+       /*
+        * Convert response from binary hex to ascii hext.
+        */
+       chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE);
+       *nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s",
+                       response);
+       *nr_out_len += 1;
+       pr_debug("[server] Sending CHAP_R=0x%s\n", response);
+       auth_ret = 0;
+out:
+       kfree(challenge);
+       kfree(challenge_binhex);
+       return auth_ret;
+}
+
+static int chap_got_response(
+       struct iscsi_conn *conn,
+       struct iscsi_node_auth *auth,
+       char *nr_in_ptr,
+       char *nr_out_ptr,
+       unsigned int *nr_out_len)
+{
+       struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+       switch (chap->digest_type) {
+       case CHAP_DIGEST_MD5:
+               if (chap_server_compute_md5(conn, auth, nr_in_ptr,
+                               nr_out_ptr, nr_out_len) < 0)
+                       return -1;
+               return 0;
+       default:
+               pr_err("Unknown CHAP digest type %d!\n",
+                               chap->digest_type);
+               return -1;
+       }
+}
+
+u32 chap_main_loop(
+       struct iscsi_conn *conn,
+       struct iscsi_node_auth *auth,
+       char *in_text,
+       char *out_text,
+       int *in_len,
+       int *out_len)
+{
+       struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+       if (!chap) {
+               chap = chap_server_open(conn, auth, in_text, out_text, out_len);
+               if (!chap)
+                       return 2;
+               chap->chap_state = CHAP_STAGE_SERVER_AIC;
+               return 0;
+       } else if (chap->chap_state == CHAP_STAGE_SERVER_AIC) {
+               convert_null_to_semi(in_text, *in_len);
+               if (chap_got_response(conn, auth, in_text, out_text,
+                               out_len) < 0) {
+                       chap_close(conn);
+                       return 2;
+               }
+               if (auth->authenticate_target)
+                       chap->chap_state = CHAP_STAGE_SERVER_NR;
+               else
+                       *out_len = 0;
+               chap_close(conn);
+               return 1;
+       }
+
+       return 2;
+}
diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h
new file mode 100644 (file)
index 0000000..2f463c0
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef _ISCSI_CHAP_H_
+#define _ISCSI_CHAP_H_
+
+#define CHAP_DIGEST_MD5                5
+#define CHAP_DIGEST_SHA                6
+
+#define CHAP_CHALLENGE_LENGTH  16
+#define CHAP_CHALLENGE_STR_LEN 4096
+#define MAX_RESPONSE_LENGTH    64      /* sufficient for MD5 */
+#define        MAX_CHAP_N_SIZE         512
+
+#define MD5_SIGNATURE_SIZE     16      /* 16 bytes in a MD5 message digest */
+
+#define CHAP_STAGE_CLIENT_A    1
+#define CHAP_STAGE_SERVER_AIC  2
+#define CHAP_STAGE_CLIENT_NR   3
+#define CHAP_STAGE_CLIENT_NRIC 4
+#define CHAP_STAGE_SERVER_NR   5
+
+extern u32 chap_main_loop(struct iscsi_conn *, struct iscsi_node_auth *, char *, char *,
+                               int *, int *);
+
+struct iscsi_chap {
+       unsigned char   digest_type;
+       unsigned char   id;
+       unsigned char   challenge[CHAP_CHALLENGE_LENGTH];
+       unsigned int    authenticate_target;
+       unsigned int    chap_state;
+} ____cacheline_aligned;
+
+#endif   /*** _ISCSI_CHAP_H_ ***/
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
new file mode 100644 (file)
index 0000000..32bb92c
--- /dev/null
@@ -0,0 +1,1882 @@
+/*******************************************************************************
+ * This file contains the configfs implementation for iSCSI Target mode
+ * from the LIO-Target Project.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ****************************************************************************/
+
+#include <linux/configfs.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_fabric_configfs.h>
+#include <target/target_core_fabric_lib.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/configfs_macros.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_nodeattrib.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_stat.h"
+#include "iscsi_target_configfs.h"
+
+struct target_fabric_configfs *lio_target_fabric_configfs;
+
+struct lio_target_configfs_attribute {
+       struct configfs_attribute attr;
+       ssize_t (*show)(void *, char *);
+       ssize_t (*store)(void *, const char *, size_t);
+};
+
+struct iscsi_portal_group *lio_get_tpg_from_tpg_item(
+       struct config_item *item,
+       struct iscsi_tiqn **tiqn_out)
+{
+       struct se_portal_group *se_tpg = container_of(to_config_group(item),
+                                       struct se_portal_group, tpg_group);
+       struct iscsi_portal_group *tpg =
+                       (struct iscsi_portal_group *)se_tpg->se_tpg_fabric_ptr;
+       int ret;
+
+       if (!tpg) {
+               pr_err("Unable to locate struct iscsi_portal_group "
+                       "pointer\n");
+               return NULL;
+       }
+       ret = iscsit_get_tpg(tpg);
+       if (ret < 0)
+               return NULL;
+
+       *tiqn_out = tpg->tpg_tiqn;
+       return tpg;
+}
+
+/* Start items for lio_target_portal_cit */
+
+static ssize_t lio_target_np_show_sctp(
+       struct se_tpg_np *se_tpg_np,
+       char *page)
+{
+       struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np,
+                               struct iscsi_tpg_np, se_tpg_np);
+       struct iscsi_tpg_np *tpg_np_sctp;
+       ssize_t rb;
+
+       tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP);
+       if (tpg_np_sctp)
+               rb = sprintf(page, "1\n");
+       else
+               rb = sprintf(page, "0\n");
+
+       return rb;
+}
+
+static ssize_t lio_target_np_store_sctp(
+       struct se_tpg_np *se_tpg_np,
+       const char *page,
+       size_t count)
+{
+       struct iscsi_np *np;
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np,
+                               struct iscsi_tpg_np, se_tpg_np);
+       struct iscsi_tpg_np *tpg_np_sctp = NULL;
+       char *endptr;
+       u32 op;
+       int ret;
+
+       op = simple_strtoul(page, &endptr, 0);
+       if ((op != 1) && (op != 0)) {
+               pr_err("Illegal value for tpg_enable: %u\n", op);
+               return -EINVAL;
+       }
+       np = tpg_np->tpg_np;
+       if (!np) {
+               pr_err("Unable to locate struct iscsi_np from"
+                               " struct iscsi_tpg_np\n");
+               return -EINVAL;
+       }
+
+       tpg = tpg_np->tpg;
+       if (iscsit_get_tpg(tpg) < 0)
+               return -EINVAL;
+
+       if (op) {
+               /*
+                * Use existing np->np_sockaddr for SCTP network portal reference
+                */
+               tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr,
+                                       np->np_ip, tpg_np, ISCSI_SCTP_TCP);
+               if (!tpg_np_sctp || IS_ERR(tpg_np_sctp))
+                       goto out;
+       } else {
+               tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP);
+               if (!tpg_np_sctp)
+                       goto out;
+
+               ret = iscsit_tpg_del_network_portal(tpg, tpg_np_sctp);
+               if (ret < 0)
+                       goto out;
+       }
+
+       iscsit_put_tpg(tpg);
+       return count;
+out:
+       iscsit_put_tpg(tpg);
+       return -EINVAL;
+}
+
+TF_NP_BASE_ATTR(lio_target, sctp, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_portal_attrs[] = {
+       &lio_target_np_sctp.attr,
+       NULL,
+};
+
+/* Stop items for lio_target_portal_cit */
+
+/* Start items for lio_target_np_cit */
+
+#define MAX_PORTAL_LEN         256
+
+struct se_tpg_np *lio_target_call_addnptotpg(
+       struct se_portal_group *se_tpg,
+       struct config_group *group,
+       const char *name)
+{
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tpg_np *tpg_np;
+       char *str, *str2, *ip_str, *port_str;
+       struct __kernel_sockaddr_storage sockaddr;
+       struct sockaddr_in *sock_in;
+       struct sockaddr_in6 *sock_in6;
+       unsigned long port;
+       int ret;
+       char buf[MAX_PORTAL_LEN + 1];
+
+       if (strlen(name) > MAX_PORTAL_LEN) {
+               pr_err("strlen(name): %d exceeds MAX_PORTAL_LEN: %d\n",
+                       (int)strlen(name), MAX_PORTAL_LEN);
+               return ERR_PTR(-EOVERFLOW);
+       }
+       memset(buf, 0, MAX_PORTAL_LEN + 1);
+       snprintf(buf, MAX_PORTAL_LEN, "%s", name);
+
+       memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
+
+       str = strstr(buf, "[");
+       if (str) {
+               const char *end;
+
+               str2 = strstr(str, "]");
+               if (!str2) {
+                       pr_err("Unable to locate trailing \"]\""
+                               " in IPv6 iSCSI network portal address\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               str++; /* Skip over leading "[" */
+               *str2 = '\0'; /* Terminate the IPv6 address */
+               str2++; /* Skip over the "]" */
+               port_str = strstr(str2, ":");
+               if (!port_str) {
+                       pr_err("Unable to locate \":port\""
+                               " in IPv6 iSCSI network portal address\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               *port_str = '\0'; /* Terminate string for IP */
+               port_str++; /* Skip over ":" */
+
+               ret = strict_strtoul(port_str, 0, &port);
+               if (ret < 0) {
+                       pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+                       return ERR_PTR(ret);
+               }
+               sock_in6 = (struct sockaddr_in6 *)&sockaddr;
+               sock_in6->sin6_family = AF_INET6;
+               sock_in6->sin6_port = htons((unsigned short)port);
+               ret = in6_pton(str, IPV6_ADDRESS_SPACE,
+                               (void *)&sock_in6->sin6_addr.in6_u, -1, &end);
+               if (ret <= 0) {
+                       pr_err("in6_pton returned: %d\n", ret);
+                       return ERR_PTR(-EINVAL);
+               }
+       } else {
+               str = ip_str = &buf[0];
+               port_str = strstr(ip_str, ":");
+               if (!port_str) {
+                       pr_err("Unable to locate \":port\""
+                               " in IPv4 iSCSI network portal address\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               *port_str = '\0'; /* Terminate string for IP */
+               port_str++; /* Skip over ":" */
+
+               ret = strict_strtoul(port_str, 0, &port);
+               if (ret < 0) {
+                       pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+                       return ERR_PTR(ret);
+               }
+               sock_in = (struct sockaddr_in *)&sockaddr;
+               sock_in->sin_family = AF_INET;
+               sock_in->sin_port = htons((unsigned short)port);
+               sock_in->sin_addr.s_addr = in_aton(ip_str);
+       }
+       tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
+       ret = iscsit_get_tpg(tpg);
+       if (ret < 0)
+               return ERR_PTR(-EINVAL);
+
+       pr_debug("LIO_Target_ConfigFS: REGISTER -> %s TPGT: %hu"
+               " PORTAL: %s\n",
+               config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+               tpg->tpgt, name);
+       /*
+        * Assume ISCSI_TCP by default.  Other network portals for other
+        * iSCSI fabrics:
+        *
+        * Traditional iSCSI over SCTP (initial support)
+        * iSER/TCP (TODO, hardware available)
+        * iSER/SCTP (TODO, software emulation with osc-iwarp)
+        * iSER/IB (TODO, hardware available)
+        *
+        * can be enabled with atributes under
+        * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/
+        *
+        */
+       tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL,
+                               ISCSI_TCP);
+       if (IS_ERR(tpg_np)) {
+               iscsit_put_tpg(tpg);
+               return ERR_PTR(PTR_ERR(tpg_np));
+       }
+       pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
+
+       iscsit_put_tpg(tpg);
+       return &tpg_np->se_tpg_np;
+}
+
+static void lio_target_call_delnpfromtpg(
+       struct se_tpg_np *se_tpg_np)
+{
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tpg_np *tpg_np;
+       struct se_portal_group *se_tpg;
+       int ret;
+
+       tpg_np = container_of(se_tpg_np, struct iscsi_tpg_np, se_tpg_np);
+       tpg = tpg_np->tpg;
+       ret = iscsit_get_tpg(tpg);
+       if (ret < 0)
+               return;
+
+       se_tpg = &tpg->tpg_se_tpg;
+       pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu"
+               " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+               tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port);
+
+       ret = iscsit_tpg_del_network_portal(tpg, tpg_np);
+       if (ret < 0)
+               goto out;
+
+       pr_debug("LIO_Target_ConfigFS: delnpfromtpg done!\n");
+out:
+       iscsit_put_tpg(tpg);
+}
+
+/* End items for lio_target_np_cit */
+
+/* Start items for lio_target_nacl_attrib_cit */
+
+#define DEF_NACL_ATTRIB(name)                                          \
+static ssize_t iscsi_nacl_attrib_show_##name(                          \
+       struct se_node_acl *se_nacl,                                    \
+       char *page)                                                     \
+{                                                                      \
+       struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \
+                                       se_node_acl);                   \
+                                                                       \
+       return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name);    \
+}                                                                      \
+                                                                       \
+static ssize_t iscsi_nacl_attrib_store_##name(                         \
+       struct se_node_acl *se_nacl,                                    \
+       const char *page,                                               \
+       size_t count)                                                   \
+{                                                                      \
+       struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \
+                                       se_node_acl);                   \
+       char *endptr;                                                   \
+       u32 val;                                                        \
+       int ret;                                                        \
+                                                                       \
+       val = simple_strtoul(page, &endptr, 0);                         \
+       ret = iscsit_na_##name(nacl, val);                              \
+       if (ret < 0)                                                    \
+               return ret;                                             \
+                                                                       \
+       return count;                                                   \
+}
+
+#define NACL_ATTR(_name, _mode) TF_NACL_ATTRIB_ATTR(iscsi, _name, _mode);
+/*
+ * Define iscsi_node_attrib_s_dataout_timeout
+ */
+DEF_NACL_ATTRIB(dataout_timeout);
+NACL_ATTR(dataout_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_dataout_timeout_retries
+ */
+DEF_NACL_ATTRIB(dataout_timeout_retries);
+NACL_ATTR(dataout_timeout_retries, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_default_erl
+ */
+DEF_NACL_ATTRIB(default_erl);
+NACL_ATTR(default_erl, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_nopin_timeout
+ */
+DEF_NACL_ATTRIB(nopin_timeout);
+NACL_ATTR(nopin_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_nopin_response_timeout
+ */
+DEF_NACL_ATTRIB(nopin_response_timeout);
+NACL_ATTR(nopin_response_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_datain_pdu_offsets
+ */
+DEF_NACL_ATTRIB(random_datain_pdu_offsets);
+NACL_ATTR(random_datain_pdu_offsets, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_datain_seq_offsets
+ */
+DEF_NACL_ATTRIB(random_datain_seq_offsets);
+NACL_ATTR(random_datain_seq_offsets, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_r2t_offsets
+ */
+DEF_NACL_ATTRIB(random_r2t_offsets);
+NACL_ATTR(random_r2t_offsets, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_nacl_attrib_attrs[] = {
+       &iscsi_nacl_attrib_dataout_timeout.attr,
+       &iscsi_nacl_attrib_dataout_timeout_retries.attr,
+       &iscsi_nacl_attrib_default_erl.attr,
+       &iscsi_nacl_attrib_nopin_timeout.attr,
+       &iscsi_nacl_attrib_nopin_response_timeout.attr,
+       &iscsi_nacl_attrib_random_datain_pdu_offsets.attr,
+       &iscsi_nacl_attrib_random_datain_seq_offsets.attr,
+       &iscsi_nacl_attrib_random_r2t_offsets.attr,
+       NULL,
+};
+
+/* End items for lio_target_nacl_attrib_cit */
+
+/* Start items for lio_target_nacl_auth_cit */
+
+#define __DEF_NACL_AUTH_STR(prefix, name, flags)                       \
+static ssize_t __iscsi_##prefix##_show_##name(                         \
+       struct iscsi_node_acl *nacl,                                    \
+       char *page)                                                     \
+{                                                                      \
+       struct iscsi_node_auth *auth = &nacl->node_auth;                \
+                                                                       \
+       if (!capable(CAP_SYS_ADMIN))                                    \
+               return -EPERM;                                          \
+       return snprintf(page, PAGE_SIZE, "%s\n", auth->name);           \
+}                                                                      \
+                                                                       \
+static ssize_t __iscsi_##prefix##_store_##name(                                \
+       struct iscsi_node_acl *nacl,                                    \
+       const char *page,                                               \
+       size_t count)                                                   \
+{                                                                      \
+       struct iscsi_node_auth *auth = &nacl->node_auth;                \
+                                                                       \
+       if (!capable(CAP_SYS_ADMIN))                                    \
+               return -EPERM;                                          \
+                                                                       \
+       snprintf(auth->name, PAGE_SIZE, "%s", page);                    \
+       if (!strncmp("NULL", auth->name, 4))                            \
+               auth->naf_flags &= ~flags;                              \
+       else                                                            \
+               auth->naf_flags |= flags;                               \
+                                                                       \
+       if ((auth->naf_flags & NAF_USERID_IN_SET) &&                    \
+           (auth->naf_flags & NAF_PASSWORD_IN_SET))                    \
+               auth->authenticate_target = 1;                          \
+       else                                                            \
+               auth->authenticate_target = 0;                          \
+                                                                       \
+       return count;                                                   \
+}
+
+#define __DEF_NACL_AUTH_INT(prefix, name)                              \
+static ssize_t __iscsi_##prefix##_show_##name(                         \
+       struct iscsi_node_acl *nacl,                                    \
+       char *page)                                                     \
+{                                                                      \
+       struct iscsi_node_auth *auth = &nacl->node_auth;                \
+                                                                       \
+       if (!capable(CAP_SYS_ADMIN))                                    \
+               return -EPERM;                                          \
+                                                                       \
+       return snprintf(page, PAGE_SIZE, "%d\n", auth->name);           \
+}
+
+#define DEF_NACL_AUTH_STR(name, flags)                                 \
+       __DEF_NACL_AUTH_STR(nacl_auth, name, flags)                     \
+static ssize_t iscsi_nacl_auth_show_##name(                            \
+       struct se_node_acl *nacl,                                       \
+       char *page)                                                     \
+{                                                                      \
+       return __iscsi_nacl_auth_show_##name(container_of(nacl,         \
+                       struct iscsi_node_acl, se_node_acl), page);             \
+}                                                                      \
+static ssize_t iscsi_nacl_auth_store_##name(                           \
+       struct se_node_acl *nacl,                                       \
+       const char *page,                                               \
+       size_t count)                                                   \
+{                                                                      \
+       return __iscsi_nacl_auth_store_##name(container_of(nacl,        \
+                       struct iscsi_node_acl, se_node_acl), page, count);      \
+}
+
+#define DEF_NACL_AUTH_INT(name)                                                \
+       __DEF_NACL_AUTH_INT(nacl_auth, name)                            \
+static ssize_t iscsi_nacl_auth_show_##name(                            \
+       struct se_node_acl *nacl,                                       \
+       char *page)                                                     \
+{                                                                      \
+       return __iscsi_nacl_auth_show_##name(container_of(nacl,         \
+                       struct iscsi_node_acl, se_node_acl), page);             \
+}
+
+#define AUTH_ATTR(_name, _mode)        TF_NACL_AUTH_ATTR(iscsi, _name, _mode);
+#define AUTH_ATTR_RO(_name) TF_NACL_AUTH_ATTR_RO(iscsi, _name);
+
+/*
+ * One-way authentication userid
+ */
+DEF_NACL_AUTH_STR(userid, NAF_USERID_SET);
+AUTH_ATTR(userid, S_IRUGO | S_IWUSR);
+/*
+ * One-way authentication password
+ */
+DEF_NACL_AUTH_STR(password, NAF_PASSWORD_SET);
+AUTH_ATTR(password, S_IRUGO | S_IWUSR);
+/*
+ * Enforce mutual authentication
+ */
+DEF_NACL_AUTH_INT(authenticate_target);
+AUTH_ATTR_RO(authenticate_target);
+/*
+ * Mutual authentication userid
+ */
+DEF_NACL_AUTH_STR(userid_mutual, NAF_USERID_IN_SET);
+AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR);
+/*
+ * Mutual authentication password
+ */
+DEF_NACL_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET);
+AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_nacl_auth_attrs[] = {
+       &iscsi_nacl_auth_userid.attr,
+       &iscsi_nacl_auth_password.attr,
+       &iscsi_nacl_auth_authenticate_target.attr,
+       &iscsi_nacl_auth_userid_mutual.attr,
+       &iscsi_nacl_auth_password_mutual.attr,
+       NULL,
+};
+
+/* End items for lio_target_nacl_auth_cit */
+
+/* Start items for lio_target_nacl_param_cit */
+
+#define DEF_NACL_PARAM(name)                                           \
+static ssize_t iscsi_nacl_param_show_##name(                           \
+       struct se_node_acl *se_nacl,                                    \
+       char *page)                                                     \
+{                                                                      \
+       struct iscsi_session *sess;                                     \
+       struct se_session *se_sess;                                     \
+       ssize_t rb;                                                     \
+                                                                       \
+       spin_lock_bh(&se_nacl->nacl_sess_lock);                         \
+       se_sess = se_nacl->nacl_sess;                                   \
+       if (!se_sess) {                                                 \
+               rb = snprintf(page, PAGE_SIZE,                          \
+                       "No Active iSCSI Session\n");                   \
+       } else {                                                        \
+               sess = se_sess->fabric_sess_ptr;                        \
+               rb = snprintf(page, PAGE_SIZE, "%u\n",                  \
+                       (u32)sess->sess_ops->name);                     \
+       }                                                               \
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);                       \
+                                                                       \
+       return rb;                                                      \
+}
+
+#define NACL_PARAM_ATTR(_name) TF_NACL_PARAM_ATTR_RO(iscsi, _name);
+
+DEF_NACL_PARAM(MaxConnections);
+NACL_PARAM_ATTR(MaxConnections);
+
+DEF_NACL_PARAM(InitialR2T);
+NACL_PARAM_ATTR(InitialR2T);
+
+DEF_NACL_PARAM(ImmediateData);
+NACL_PARAM_ATTR(ImmediateData);
+
+DEF_NACL_PARAM(MaxBurstLength);
+NACL_PARAM_ATTR(MaxBurstLength);
+
+DEF_NACL_PARAM(FirstBurstLength);
+NACL_PARAM_ATTR(FirstBurstLength);
+
+DEF_NACL_PARAM(DefaultTime2Wait);
+NACL_PARAM_ATTR(DefaultTime2Wait);
+
+DEF_NACL_PARAM(DefaultTime2Retain);
+NACL_PARAM_ATTR(DefaultTime2Retain);
+
+DEF_NACL_PARAM(MaxOutstandingR2T);
+NACL_PARAM_ATTR(MaxOutstandingR2T);
+
+DEF_NACL_PARAM(DataPDUInOrder);
+NACL_PARAM_ATTR(DataPDUInOrder);
+
+DEF_NACL_PARAM(DataSequenceInOrder);
+NACL_PARAM_ATTR(DataSequenceInOrder);
+
+DEF_NACL_PARAM(ErrorRecoveryLevel);
+NACL_PARAM_ATTR(ErrorRecoveryLevel);
+
+static struct configfs_attribute *lio_target_nacl_param_attrs[] = {
+       &iscsi_nacl_param_MaxConnections.attr,
+       &iscsi_nacl_param_InitialR2T.attr,
+       &iscsi_nacl_param_ImmediateData.attr,
+       &iscsi_nacl_param_MaxBurstLength.attr,
+       &iscsi_nacl_param_FirstBurstLength.attr,
+       &iscsi_nacl_param_DefaultTime2Wait.attr,
+       &iscsi_nacl_param_DefaultTime2Retain.attr,
+       &iscsi_nacl_param_MaxOutstandingR2T.attr,
+       &iscsi_nacl_param_DataPDUInOrder.attr,
+       &iscsi_nacl_param_DataSequenceInOrder.attr,
+       &iscsi_nacl_param_ErrorRecoveryLevel.attr,
+       NULL,
+};
+
+/* End items for lio_target_nacl_param_cit */
+
+/* Start items for lio_target_acl_cit */
+
+static ssize_t lio_target_nacl_show_info(
+       struct se_node_acl *se_nacl,
+       char *page)
+{
+       struct iscsi_session *sess;
+       struct iscsi_conn *conn;
+       struct se_session *se_sess;
+       ssize_t rb = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (!se_sess) {
+               rb += sprintf(page+rb, "No active iSCSI Session for Initiator"
+                       " Endpoint: %s\n", se_nacl->initiatorname);
+       } else {
+               sess = se_sess->fabric_sess_ptr;
+
+               if (sess->sess_ops->InitiatorName)
+                       rb += sprintf(page+rb, "InitiatorName: %s\n",
+                               sess->sess_ops->InitiatorName);
+               if (sess->sess_ops->InitiatorAlias)
+                       rb += sprintf(page+rb, "InitiatorAlias: %s\n",
+                               sess->sess_ops->InitiatorAlias);
+
+               rb += sprintf(page+rb, "LIO Session ID: %u   "
+                       "ISID: 0x%02x %02x %02x %02x %02x %02x  "
+                       "TSIH: %hu  ", sess->sid,
+                       sess->isid[0], sess->isid[1], sess->isid[2],
+                       sess->isid[3], sess->isid[4], sess->isid[5],
+                       sess->tsih);
+               rb += sprintf(page+rb, "SessionType: %s\n",
+                               (sess->sess_ops->SessionType) ?
+                               "Discovery" : "Normal");
+               rb += sprintf(page+rb, "Session State: ");
+               switch (sess->session_state) {
+               case TARG_SESS_STATE_FREE:
+                       rb += sprintf(page+rb, "TARG_SESS_FREE\n");
+                       break;
+               case TARG_SESS_STATE_ACTIVE:
+                       rb += sprintf(page+rb, "TARG_SESS_STATE_ACTIVE\n");
+                       break;
+               case TARG_SESS_STATE_LOGGED_IN:
+                       rb += sprintf(page+rb, "TARG_SESS_STATE_LOGGED_IN\n");
+                       break;
+               case TARG_SESS_STATE_FAILED:
+                       rb += sprintf(page+rb, "TARG_SESS_STATE_FAILED\n");
+                       break;
+               case TARG_SESS_STATE_IN_CONTINUE:
+                       rb += sprintf(page+rb, "TARG_SESS_STATE_IN_CONTINUE\n");
+                       break;
+               default:
+                       rb += sprintf(page+rb, "ERROR: Unknown Session"
+                                       " State!\n");
+                       break;
+               }
+
+               rb += sprintf(page+rb, "---------------------[iSCSI Session"
+                               " Values]-----------------------\n");
+               rb += sprintf(page+rb, "  CmdSN/WR  :  CmdSN/WC  :  ExpCmdSN"
+                               "  :  MaxCmdSN  :     ITT    :     TTT\n");
+               rb += sprintf(page+rb, " 0x%08x   0x%08x   0x%08x   0x%08x"
+                               "   0x%08x   0x%08x\n",
+                       sess->cmdsn_window,
+                       (sess->max_cmd_sn - sess->exp_cmd_sn) + 1,
+                       sess->exp_cmd_sn, sess->max_cmd_sn,
+                       sess->init_task_tag, sess->targ_xfer_tag);
+               rb += sprintf(page+rb, "----------------------[iSCSI"
+                               " Connections]-------------------------\n");
+
+               spin_lock(&sess->conn_lock);
+               list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+                       rb += sprintf(page+rb, "CID: %hu  Connection"
+                                       " State: ", conn->cid);
+                       switch (conn->conn_state) {
+                       case TARG_CONN_STATE_FREE:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_FREE\n");
+                               break;
+                       case TARG_CONN_STATE_XPT_UP:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_XPT_UP\n");
+                               break;
+                       case TARG_CONN_STATE_IN_LOGIN:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_IN_LOGIN\n");
+                               break;
+                       case TARG_CONN_STATE_LOGGED_IN:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_LOGGED_IN\n");
+                               break;
+                       case TARG_CONN_STATE_IN_LOGOUT:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_IN_LOGOUT\n");
+                               break;
+                       case TARG_CONN_STATE_LOGOUT_REQUESTED:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_LOGOUT_REQUESTED\n");
+                               break;
+                       case TARG_CONN_STATE_CLEANUP_WAIT:
+                               rb += sprintf(page+rb,
+                                       "TARG_CONN_STATE_CLEANUP_WAIT\n");
+                               break;
+                       default:
+                               rb += sprintf(page+rb,
+                                       "ERROR: Unknown Connection State!\n");
+                               break;
+                       }
+
+                       rb += sprintf(page+rb, "   Address %s %s", conn->login_ip,
+                               (conn->network_transport == ISCSI_TCP) ?
+                               "TCP" : "SCTP");
+                       rb += sprintf(page+rb, "  StatSN: 0x%08x\n",
+                               conn->stat_sn);
+               }
+               spin_unlock(&sess->conn_lock);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return rb;
+}
+
+TF_NACL_BASE_ATTR_RO(lio_target, info);
+
+static ssize_t lio_target_nacl_show_cmdsn_depth(
+       struct se_node_acl *se_nacl,
+       char *page)
+{
+       return sprintf(page, "%u\n", se_nacl->queue_depth);
+}
+
+static ssize_t lio_target_nacl_store_cmdsn_depth(
+       struct se_node_acl *se_nacl,
+       const char *page,
+       size_t count)
+{
+       struct se_portal_group *se_tpg = se_nacl->se_tpg;
+       struct iscsi_portal_group *tpg = container_of(se_tpg,
+                       struct iscsi_portal_group, tpg_se_tpg);
+       struct config_item *acl_ci, *tpg_ci, *wwn_ci;
+       char *endptr;
+       u32 cmdsn_depth = 0;
+       int ret;
+
+       cmdsn_depth = simple_strtoul(page, &endptr, 0);
+       if (cmdsn_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) {
+               pr_err("Passed cmdsn_depth: %u exceeds"
+                       " TA_DEFAULT_CMDSN_DEPTH_MAX: %u\n", cmdsn_depth,
+                       TA_DEFAULT_CMDSN_DEPTH_MAX);
+               return -EINVAL;
+       }
+       acl_ci = &se_nacl->acl_group.cg_item;
+       if (!acl_ci) {
+               pr_err("Unable to locatel acl_ci\n");
+               return -EINVAL;
+       }
+       tpg_ci = &acl_ci->ci_parent->ci_group->cg_item;
+       if (!tpg_ci) {
+               pr_err("Unable to locate tpg_ci\n");
+               return -EINVAL;
+       }
+       wwn_ci = &tpg_ci->ci_group->cg_item;
+       if (!wwn_ci) {
+               pr_err("Unable to locate config_item wwn_ci\n");
+               return -EINVAL;
+       }
+
+       if (iscsit_get_tpg(tpg) < 0)
+               return -EINVAL;
+       /*
+        * iscsit_tpg_set_initiator_node_queue_depth() assumes force=1
+        */
+       ret = iscsit_tpg_set_initiator_node_queue_depth(tpg,
+                               config_item_name(acl_ci), cmdsn_depth, 1);
+
+       pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for"
+               "InitiatorName: %s\n", config_item_name(wwn_ci),
+               config_item_name(tpg_ci), cmdsn_depth,
+               config_item_name(acl_ci));
+
+       iscsit_put_tpg(tpg);
+       return (!ret) ? count : (ssize_t)ret;
+}
+
+TF_NACL_BASE_ATTR(lio_target, cmdsn_depth, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_initiator_attrs[] = {
+       &lio_target_nacl_info.attr,
+       &lio_target_nacl_cmdsn_depth.attr,
+       NULL,
+};
+
+static struct se_node_acl *lio_tpg_alloc_fabric_acl(
+       struct se_portal_group *se_tpg)
+{
+       struct iscsi_node_acl *acl;
+
+       acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL);
+       if (!acl) {
+               pr_err("Unable to allocate memory for struct iscsi_node_acl\n");
+               return NULL;
+       }
+
+       return &acl->se_node_acl;
+}
+
+static struct se_node_acl *lio_target_make_nodeacl(
+       struct se_portal_group *se_tpg,
+       struct config_group *group,
+       const char *name)
+{
+       struct config_group *stats_cg;
+       struct iscsi_node_acl *acl;
+       struct se_node_acl *se_nacl_new, *se_nacl;
+       struct iscsi_portal_group *tpg = container_of(se_tpg,
+                       struct iscsi_portal_group, tpg_se_tpg);
+       u32 cmdsn_depth;
+
+       se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg);
+       if (!se_nacl_new)
+               return ERR_PTR(-ENOMEM);
+
+       acl = container_of(se_nacl_new, struct iscsi_node_acl,
+                               se_node_acl);
+
+       cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
+       /*
+        * se_nacl_new may be released by core_tpg_add_initiator_node_acl()
+        * when converting a NdoeACL from demo mode -> explict
+        */
+       se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,
+                               name, cmdsn_depth);
+       if (IS_ERR(se_nacl))
+               return se_nacl;
+
+       stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+
+       stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
+                               GFP_KERNEL);
+       if (!stats_cg->default_groups) {
+               pr_err("Unable to allocate memory for"
+                               " stats_cg->default_groups\n");
+               core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1);
+               kfree(acl);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group;
+       stats_cg->default_groups[1] = NULL;
+       config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group,
+                       "iscsi_sess_stats", &iscsi_stat_sess_cit);
+
+       return se_nacl;
+}
+
+static void lio_target_drop_nodeacl(
+       struct se_node_acl *se_nacl)
+{
+       struct se_portal_group *se_tpg = se_nacl->se_tpg;
+       struct iscsi_node_acl *acl = container_of(se_nacl,
+                       struct iscsi_node_acl, se_node_acl);
+       struct config_item *df_item;
+       struct config_group *stats_cg;
+       int i;
+
+       stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+       for (i = 0; stats_cg->default_groups[i]; i++) {
+               df_item = &stats_cg->default_groups[i]->cg_item;
+               stats_cg->default_groups[i] = NULL;
+               config_item_put(df_item);
+       }
+       kfree(stats_cg->default_groups);
+
+       core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1);
+       kfree(acl);
+}
+
+/* End items for lio_target_acl_cit */
+
+/* Start items for lio_target_tpg_attrib_cit */
+
+#define DEF_TPG_ATTRIB(name)                                           \
+                                                                       \
+static ssize_t iscsi_tpg_attrib_show_##name(                           \
+       struct se_portal_group *se_tpg,                         \
+       char *page)                                                     \
+{                                                                      \
+       struct iscsi_portal_group *tpg = container_of(se_tpg,           \
+                       struct iscsi_portal_group, tpg_se_tpg); \
+       ssize_t rb;                                                     \
+                                                                       \
+       if (iscsit_get_tpg(tpg) < 0)                                    \
+               return -EINVAL;                                         \
+                                                                       \
+       rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name);        \
+       iscsit_put_tpg(tpg);                                            \
+       return rb;                                                      \
+}                                                                      \
+                                                                       \
+static ssize_t iscsi_tpg_attrib_store_##name(                          \
+       struct se_portal_group *se_tpg,                         \
+       const char *page,                                               \
+       size_t count)                                                   \
+{                                                                      \
+       struct iscsi_portal_group *tpg = container_of(se_tpg,           \
+                       struct iscsi_portal_group, tpg_se_tpg); \
+       char *endptr;                                                   \
+       u32 val;                                                        \
+       int ret;                                                        \
+                                                                       \
+       if (iscsit_get_tpg(tpg) < 0)                                    \
+               return -EINVAL;                                         \
+                                                                       \
+       val = simple_strtoul(page, &endptr, 0);                         \
+       ret = iscsit_ta_##name(tpg, val);                               \
+       if (ret < 0)                                                    \
+               goto out;                                               \
+                                                                       \
+       iscsit_put_tpg(tpg);                                            \
+       return count;                                                   \
+out:                                                                   \
+       iscsit_put_tpg(tpg);                                            \
+       return ret;                                                     \
+}
+
+#define TPG_ATTR(_name, _mode) TF_TPG_ATTRIB_ATTR(iscsi, _name, _mode);
+
+/*
+ * Define iscsi_tpg_attrib_s_authentication
+ */
+DEF_TPG_ATTRIB(authentication);
+TPG_ATTR(authentication, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_login_timeout
+ */
+DEF_TPG_ATTRIB(login_timeout);
+TPG_ATTR(login_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_netif_timeout
+ */
+DEF_TPG_ATTRIB(netif_timeout);
+TPG_ATTR(netif_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_generate_node_acls
+ */
+DEF_TPG_ATTRIB(generate_node_acls);
+TPG_ATTR(generate_node_acls, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_default_cmdsn_depth
+ */
+DEF_TPG_ATTRIB(default_cmdsn_depth);
+TPG_ATTR(default_cmdsn_depth, S_IRUGO | S_IWUSR);
+/*
+ Define iscsi_tpg_attrib_s_cache_dynamic_acls
+ */
+DEF_TPG_ATTRIB(cache_dynamic_acls);
+TPG_ATTR(cache_dynamic_acls, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_demo_mode_write_protect
+ */
+DEF_TPG_ATTRIB(demo_mode_write_protect);
+TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_prod_mode_write_protect
+ */
+DEF_TPG_ATTRIB(prod_mode_write_protect);
+TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
+       &iscsi_tpg_attrib_authentication.attr,
+       &iscsi_tpg_attrib_login_timeout.attr,
+       &iscsi_tpg_attrib_netif_timeout.attr,
+       &iscsi_tpg_attrib_generate_node_acls.attr,
+       &iscsi_tpg_attrib_default_cmdsn_depth.attr,
+       &iscsi_tpg_attrib_cache_dynamic_acls.attr,
+       &iscsi_tpg_attrib_demo_mode_write_protect.attr,
+       &iscsi_tpg_attrib_prod_mode_write_protect.attr,
+       NULL,
+};
+
+/* End items for lio_target_tpg_attrib_cit */
+
+/* Start items for lio_target_tpg_param_cit */
+
+#define DEF_TPG_PARAM(name)                                            \
+static ssize_t iscsi_tpg_param_show_##name(                            \
+       struct se_portal_group *se_tpg,                                 \
+       char *page)                                                     \
+{                                                                      \
+       struct iscsi_portal_group *tpg = container_of(se_tpg,           \
+                       struct iscsi_portal_group, tpg_se_tpg);         \
+       struct iscsi_param *param;                                      \
+       ssize_t rb;                                                     \
+                                                                       \
+       if (iscsit_get_tpg(tpg) < 0)                                    \
+               return -EINVAL;                                         \
+                                                                       \
+       param = iscsi_find_param_from_key(__stringify(name),            \
+                               tpg->param_list);                       \
+       if (!param) {                                                   \
+               iscsit_put_tpg(tpg);                                    \
+               return -EINVAL;                                         \
+       }                                                               \
+       rb = snprintf(page, PAGE_SIZE, "%s\n", param->value);           \
+                                                                       \
+       iscsit_put_tpg(tpg);                                            \
+       return rb;                                                      \
+}                                                                      \
+static ssize_t iscsi_tpg_param_store_##name(                           \
+       struct se_portal_group *se_tpg,                         \
+       const char *page,                                               \
+       size_t count)                                                   \
+{                                                                      \
+       struct iscsi_portal_group *tpg = container_of(se_tpg,           \
+                       struct iscsi_portal_group, tpg_se_tpg);         \
+       char *buf;                                                      \
+       int ret;                                                        \
+                                                                       \
+       buf = kzalloc(PAGE_SIZE, GFP_KERNEL);                           \
+       if (!buf)                                                       \
+               return -ENOMEM;                                         \
+       snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page);     \
+       buf[strlen(buf)-1] = '\0'; /* Kill newline */                   \
+                                                                       \
+       if (iscsit_get_tpg(tpg) < 0) {                                  \
+               kfree(buf);                                             \
+               return -EINVAL;                                         \
+       }                                                               \
+                                                                       \
+       ret = iscsi_change_param_value(buf, tpg->param_list, 1);        \
+       if (ret < 0)                                                    \
+               goto out;                                               \
+                                                                       \
+       kfree(buf);                                                     \
+       iscsit_put_tpg(tpg);                                            \
+       return count;                                                   \
+out:                                                                   \
+       kfree(buf);                                                     \
+       iscsit_put_tpg(tpg);                                            \
+       return -EINVAL;                                         \
+}
+
+#define TPG_PARAM_ATTR(_name, _mode) TF_TPG_PARAM_ATTR(iscsi, _name, _mode);
+
+DEF_TPG_PARAM(AuthMethod);
+TPG_PARAM_ATTR(AuthMethod, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(HeaderDigest);
+TPG_PARAM_ATTR(HeaderDigest, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataDigest);
+TPG_PARAM_ATTR(DataDigest, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxConnections);
+TPG_PARAM_ATTR(MaxConnections, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(TargetAlias);
+TPG_PARAM_ATTR(TargetAlias, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(InitialR2T);
+TPG_PARAM_ATTR(InitialR2T, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(ImmediateData);
+TPG_PARAM_ATTR(ImmediateData, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxRecvDataSegmentLength);
+TPG_PARAM_ATTR(MaxRecvDataSegmentLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxBurstLength);
+TPG_PARAM_ATTR(MaxBurstLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(FirstBurstLength);
+TPG_PARAM_ATTR(FirstBurstLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DefaultTime2Wait);
+TPG_PARAM_ATTR(DefaultTime2Wait, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DefaultTime2Retain);
+TPG_PARAM_ATTR(DefaultTime2Retain, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxOutstandingR2T);
+TPG_PARAM_ATTR(MaxOutstandingR2T, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataPDUInOrder);
+TPG_PARAM_ATTR(DataPDUInOrder, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataSequenceInOrder);
+TPG_PARAM_ATTR(DataSequenceInOrder, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(ErrorRecoveryLevel);
+TPG_PARAM_ATTR(ErrorRecoveryLevel, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(IFMarker);
+TPG_PARAM_ATTR(IFMarker, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(OFMarker);
+TPG_PARAM_ATTR(OFMarker, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(IFMarkInt);
+TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(OFMarkInt);
+TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_param_attrs[] = {
+       &iscsi_tpg_param_AuthMethod.attr,
+       &iscsi_tpg_param_HeaderDigest.attr,
+       &iscsi_tpg_param_DataDigest.attr,
+       &iscsi_tpg_param_MaxConnections.attr,
+       &iscsi_tpg_param_TargetAlias.attr,
+       &iscsi_tpg_param_InitialR2T.attr,
+       &iscsi_tpg_param_ImmediateData.attr,
+       &iscsi_tpg_param_MaxRecvDataSegmentLength.attr,
+       &iscsi_tpg_param_MaxBurstLength.attr,
+       &iscsi_tpg_param_FirstBurstLength.attr,
+       &iscsi_tpg_param_DefaultTime2Wait.attr,
+       &iscsi_tpg_param_DefaultTime2Retain.attr,
+       &iscsi_tpg_param_MaxOutstandingR2T.attr,
+       &iscsi_tpg_param_DataPDUInOrder.attr,
+       &iscsi_tpg_param_DataSequenceInOrder.attr,
+       &iscsi_tpg_param_ErrorRecoveryLevel.attr,
+       &iscsi_tpg_param_IFMarker.attr,
+       &iscsi_tpg_param_OFMarker.attr,
+       &iscsi_tpg_param_IFMarkInt.attr,
+       &iscsi_tpg_param_OFMarkInt.attr,
+       NULL,
+};
+
+/* End items for lio_target_tpg_param_cit */
+
+/* Start items for lio_target_tpg_cit */
+
+static ssize_t lio_target_tpg_show_enable(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct iscsi_portal_group *tpg = container_of(se_tpg,
+                       struct iscsi_portal_group, tpg_se_tpg);
+       ssize_t len;
+
+       spin_lock(&tpg->tpg_state_lock);
+       len = sprintf(page, "%d\n",
+                       (tpg->tpg_state == TPG_STATE_ACTIVE) ? 1 : 0);
+       spin_unlock(&tpg->tpg_state_lock);
+
+       return len;
+}
+
+static ssize_t lio_target_tpg_store_enable(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct iscsi_portal_group *tpg = container_of(se_tpg,
+                       struct iscsi_portal_group, tpg_se_tpg);
+       char *endptr;
+       u32 op;
+       int ret = 0;
+
+       op = simple_strtoul(page, &endptr, 0);
+       if ((op != 1) && (op != 0)) {
+               pr_err("Illegal value for tpg_enable: %u\n", op);
+               return -EINVAL;
+       }
+
+       ret = iscsit_get_tpg(tpg);
+       if (ret < 0)
+               return -EINVAL;
+
+       if (op) {
+               ret = iscsit_tpg_enable_portal_group(tpg);
+               if (ret < 0)
+                       goto out;
+       } else {
+               /*
+                * iscsit_tpg_disable_portal_group() assumes force=1
+                */
+               ret = iscsit_tpg_disable_portal_group(tpg, 1);
+               if (ret < 0)
+                       goto out;
+       }
+
+       iscsit_put_tpg(tpg);
+       return count;
+out:
+       iscsit_put_tpg(tpg);
+       return -EINVAL;
+}
+
+TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_attrs[] = {
+       &lio_target_tpg_enable.attr,
+       NULL,
+};
+
+/* End items for lio_target_tpg_cit */
+
+/* Start items for lio_target_tiqn_cit */
+
+struct se_portal_group *lio_target_tiqn_addtpg(
+       struct se_wwn *wwn,
+       struct config_group *group,
+       const char *name)
+{
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tiqn *tiqn;
+       char *tpgt_str, *end_ptr;
+       int ret = 0;
+       unsigned short int tpgt;
+
+       tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+       /*
+        * Only tpgt_# directory groups can be created below
+        * target/iscsi/iqn.superturodiskarry/
+       */
+       tpgt_str = strstr(name, "tpgt_");
+       if (!tpgt_str) {
+               pr_err("Unable to locate \"tpgt_#\" directory"
+                               " group\n");
+               return NULL;
+       }
+       tpgt_str += 5; /* Skip ahead of "tpgt_" */
+       tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0);
+
+       tpg = iscsit_alloc_portal_group(tiqn, tpgt);
+       if (!tpg)
+               return NULL;
+
+       ret = core_tpg_register(
+                       &lio_target_fabric_configfs->tf_ops,
+                       wwn, &tpg->tpg_se_tpg, (void *)tpg,
+                       TRANSPORT_TPG_TYPE_NORMAL);
+       if (ret < 0)
+               return NULL;
+
+       ret = iscsit_tpg_add_portal_group(tiqn, tpg);
+       if (ret != 0)
+               goto out;
+
+       pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+       pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated TPG: %s\n",
+                       name);
+       return &tpg->tpg_se_tpg;
+out:
+       core_tpg_deregister(&tpg->tpg_se_tpg);
+       kfree(tpg);
+       return NULL;
+}
+
+void lio_target_tiqn_deltpg(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tiqn *tiqn;
+
+       tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
+       tiqn = tpg->tpg_tiqn;
+       /*
+        * iscsit_tpg_del_portal_group() assumes force=1
+        */
+       pr_debug("LIO_Target_ConfigFS: DEREGISTER -> Releasing TPG\n");
+       iscsit_tpg_del_portal_group(tiqn, tpg, 1);
+}
+
+/* End items for lio_target_tiqn_cit */
+
+/* Start LIO-Target TIQN struct contig_item lio_target_cit */
+
+static ssize_t lio_target_wwn_show_attr_lio_version(
+       struct target_fabric_configfs *tf,
+       char *page)
+{
+       return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n");
+}
+
+TF_WWN_ATTR_RO(lio_target, lio_version);
+
+static struct configfs_attribute *lio_target_wwn_attrs[] = {
+       &lio_target_wwn_lio_version.attr,
+       NULL,
+};
+
+struct se_wwn *lio_target_call_coreaddtiqn(
+       struct target_fabric_configfs *tf,
+       struct config_group *group,
+       const char *name)
+{
+       struct config_group *stats_cg;
+       struct iscsi_tiqn *tiqn;
+
+       tiqn = iscsit_add_tiqn((unsigned char *)name);
+       if (IS_ERR(tiqn))
+               return ERR_PTR(PTR_ERR(tiqn));
+       /*
+        * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
+        */
+       stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
+
+       stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 6,
+                               GFP_KERNEL);
+       if (!stats_cg->default_groups) {
+               pr_err("Unable to allocate memory for"
+                               " stats_cg->default_groups\n");
+               iscsit_del_tiqn(tiqn);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group;
+       stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group;
+       stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group;
+       stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group;
+       stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group;
+       stats_cg->default_groups[5] = NULL;
+       config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group,
+                       "iscsi_instance", &iscsi_stat_instance_cit);
+       config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group,
+                       "iscsi_sess_err", &iscsi_stat_sess_err_cit);
+       config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group,
+                       "iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit);
+       config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group,
+                       "iscsi_login_stats", &iscsi_stat_login_cit);
+       config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group,
+                       "iscsi_logout_stats", &iscsi_stat_logout_cit);
+
+       pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+       pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:"
+                       " %s\n", name);
+       return &tiqn->tiqn_wwn;
+}
+
+void lio_target_call_coredeltiqn(
+       struct se_wwn *wwn)
+{
+       struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+       struct config_item *df_item;
+       struct config_group *stats_cg;
+       int i;
+
+       stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
+       for (i = 0; stats_cg->default_groups[i]; i++) {
+               df_item = &stats_cg->default_groups[i]->cg_item;
+               stats_cg->default_groups[i] = NULL;
+               config_item_put(df_item);
+       }
+       kfree(stats_cg->default_groups);
+
+       pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n",
+                       tiqn->tiqn);
+       iscsit_del_tiqn(tiqn);
+}
+
+/* End LIO-Target TIQN struct contig_lio_target_cit */
+
+/* Start lio_target_discovery_auth_cit */
+
+#define DEF_DISC_AUTH_STR(name, flags)                                 \
+       __DEF_NACL_AUTH_STR(disc, name, flags)                          \
+static ssize_t iscsi_disc_show_##name(                                 \
+       struct target_fabric_configfs *tf,                              \
+       char *page)                                                     \
+{                                                                      \
+       return __iscsi_disc_show_##name(&iscsit_global->discovery_acl,  \
+               page);                                                  \
+}                                                                      \
+static ssize_t iscsi_disc_store_##name(                                        \
+       struct target_fabric_configfs *tf,                              \
+       const char *page,                                               \
+       size_t count)                                                   \
+{                                                                      \
+       return __iscsi_disc_store_##name(&iscsit_global->discovery_acl, \
+               page, count);                                           \
+}
+
+#define DEF_DISC_AUTH_INT(name)                                                \
+       __DEF_NACL_AUTH_INT(disc, name)                                 \
+static ssize_t iscsi_disc_show_##name(                                 \
+       struct target_fabric_configfs *tf,                              \
+       char *page)                                                     \
+{                                                                      \
+       return __iscsi_disc_show_##name(&iscsit_global->discovery_acl,  \
+                       page);                                          \
+}
+
+#define DISC_AUTH_ATTR(_name, _mode) TF_DISC_ATTR(iscsi, _name, _mode)
+#define DISC_AUTH_ATTR_RO(_name) TF_DISC_ATTR_RO(iscsi, _name)
+
+/*
+ * One-way authentication userid
+ */
+DEF_DISC_AUTH_STR(userid, NAF_USERID_SET);
+DISC_AUTH_ATTR(userid, S_IRUGO | S_IWUSR);
+/*
+ * One-way authentication password
+ */
+DEF_DISC_AUTH_STR(password, NAF_PASSWORD_SET);
+DISC_AUTH_ATTR(password, S_IRUGO | S_IWUSR);
+/*
+ * Enforce mutual authentication
+ */
+DEF_DISC_AUTH_INT(authenticate_target);
+DISC_AUTH_ATTR_RO(authenticate_target);
+/*
+ * Mutual authentication userid
+ */
+DEF_DISC_AUTH_STR(userid_mutual, NAF_USERID_IN_SET);
+DISC_AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR);
+/*
+ * Mutual authentication password
+ */
+DEF_DISC_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET);
+DISC_AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR);
+
+/*
+ * enforce_discovery_auth
+ */
+static ssize_t iscsi_disc_show_enforce_discovery_auth(
+       struct target_fabric_configfs *tf,
+       char *page)
+{
+       struct iscsi_node_auth *discovery_auth = &iscsit_global->discovery_acl.node_auth;
+
+       return sprintf(page, "%d\n", discovery_auth->enforce_discovery_auth);
+}
+
+static ssize_t iscsi_disc_store_enforce_discovery_auth(
+       struct target_fabric_configfs *tf,
+       const char *page,
+       size_t count)
+{
+       struct iscsi_param *param;
+       struct iscsi_portal_group *discovery_tpg = iscsit_global->discovery_tpg;
+       char *endptr;
+       u32 op;
+
+       op = simple_strtoul(page, &endptr, 0);
+       if ((op != 1) && (op != 0)) {
+               pr_err("Illegal value for enforce_discovery_auth:"
+                               " %u\n", op);
+               return -EINVAL;
+       }
+
+       if (!discovery_tpg) {
+               pr_err("iscsit_global->discovery_tpg is NULL\n");
+               return -EINVAL;
+       }
+
+       param = iscsi_find_param_from_key(AUTHMETHOD,
+                               discovery_tpg->param_list);
+       if (!param)
+               return -EINVAL;
+
+       if (op) {
+               /*
+                * Reset the AuthMethod key to CHAP.
+                */
+               if (iscsi_update_param_value(param, CHAP) < 0)
+                       return -EINVAL;
+
+               discovery_tpg->tpg_attrib.authentication = 1;
+               iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 1;
+               pr_debug("LIO-CORE[0] Successfully enabled"
+                       " authentication enforcement for iSCSI"
+                       " Discovery TPG\n");
+       } else {
+               /*
+                * Reset the AuthMethod key to CHAP,None
+                */
+               if (iscsi_update_param_value(param, "CHAP,None") < 0)
+                       return -EINVAL;
+
+               discovery_tpg->tpg_attrib.authentication = 0;
+               iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 0;
+               pr_debug("LIO-CORE[0] Successfully disabled"
+                       " authentication enforcement for iSCSI"
+                       " Discovery TPG\n");
+       }
+
+       return count;
+}
+
+DISC_AUTH_ATTR(enforce_discovery_auth, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_discovery_auth_attrs[] = {
+       &iscsi_disc_userid.attr,
+       &iscsi_disc_password.attr,
+       &iscsi_disc_authenticate_target.attr,
+       &iscsi_disc_userid_mutual.attr,
+       &iscsi_disc_password_mutual.attr,
+       &iscsi_disc_enforce_discovery_auth.attr,
+       NULL,
+};
+
+/* End lio_target_discovery_auth_cit */
+
+/* Start functions for target_core_fabric_ops */
+
+static char *iscsi_get_fabric_name(void)
+{
+       return "iSCSI";
+}
+
+static u32 iscsi_get_task_tag(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       return cmd->init_task_tag;
+}
+
+static int iscsi_get_cmd_state(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       return cmd->i_state;
+}
+
+static int iscsi_is_state_remove(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       return (cmd->i_state == ISTATE_REMOVE);
+}
+
+static int lio_sess_logged_in(struct se_session *se_sess)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+       int ret;
+       /*
+        * Called with spin_lock_bh(&tpg_lock); and
+        * spin_lock(&se_tpg->session_lock); held.
+        */
+       spin_lock(&sess->conn_lock);
+       ret = (sess->session_state != TARG_SESS_STATE_LOGGED_IN);
+       spin_unlock(&sess->conn_lock);
+
+       return ret;
+}
+
+static u32 lio_sess_get_index(struct se_session *se_sess)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+       return sess->session_index;
+}
+
+static u32 lio_sess_get_initiator_sid(
+       struct se_session *se_sess,
+       unsigned char *buf,
+       u32 size)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+       /*
+        * iSCSI Initiator Session Identifier from RFC-3720.
+        */
+       return snprintf(buf, size, "%02x%02x%02x%02x%02x%02x",
+               sess->isid[0], sess->isid[1], sess->isid[2],
+               sess->isid[3], sess->isid[4], sess->isid[5]);
+}
+
+static int lio_queue_data_in(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       cmd->i_state = ISTATE_SEND_DATAIN;
+       iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+       return 0;
+}
+
+static int lio_write_pending(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       if (!cmd->immediate_data && !cmd->unsolicited_data)
+               return iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 1);
+
+       return 0;
+}
+
+static int lio_write_pending_status(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+       int ret;
+
+       spin_lock_bh(&cmd->istate_lock);
+       ret = !(cmd->cmd_flags & ICF_GOT_LAST_DATAOUT);
+       spin_unlock_bh(&cmd->istate_lock);
+
+       return ret;
+}
+
+static int lio_queue_status(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       cmd->i_state = ISTATE_SEND_STATUS;
+       iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+       return 0;
+}
+
+static u16 lio_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_length)
+{
+       unsigned char *buffer = se_cmd->sense_buffer;
+       /*
+        * From RFC-3720 10.4.7.  Data Segment - Sense and Response Data Segment
+        * 16-bit SenseLength.
+        */
+       buffer[0] = ((sense_length >> 8) & 0xff);
+       buffer[1] = (sense_length & 0xff);
+       /*
+        * Return two byte offset into allocated sense_buffer.
+        */
+       return 2;
+}
+
+static u16 lio_get_fabric_sense_len(void)
+{
+       /*
+        * Return two byte offset into allocated sense_buffer.
+        */
+       return 2;
+}
+
+static int lio_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       cmd->i_state = ISTATE_SEND_TASKMGTRSP;
+       iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+       return 0;
+}
+
+static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return &tpg->tpg_tiqn->tiqn[0];
+}
+
+static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return tpg->tpgt;
+}
+
+static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
+}
+
+static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls;
+}
+
+static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls;
+}
+
+static int lio_tpg_check_demo_mode_write_protect(
+       struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect;
+}
+
+static int lio_tpg_check_prod_mode_write_protect(
+       struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect;
+}
+
+static void lio_tpg_release_fabric_acl(
+       struct se_portal_group *se_tpg,
+       struct se_node_acl *se_acl)
+{
+       struct iscsi_node_acl *acl = container_of(se_acl,
+                               struct iscsi_node_acl, se_node_acl);
+       kfree(acl);
+}
+
+/*
+ * Called with spin_lock_bh(struct se_portal_group->session_lock) held..
+ *
+ * Also, this function calls iscsit_inc_session_usage_count() on the
+ * struct iscsi_session in question.
+ */
+static int lio_tpg_shutdown_session(struct se_session *se_sess)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+       spin_lock(&sess->conn_lock);
+       if (atomic_read(&sess->session_fall_back_to_erl0) ||
+           atomic_read(&sess->session_logout) ||
+           (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+               spin_unlock(&sess->conn_lock);
+               return 0;
+       }
+       atomic_set(&sess->session_reinstatement, 1);
+       spin_unlock(&sess->conn_lock);
+
+       iscsit_inc_session_usage_count(sess);
+       iscsit_stop_time2retain_timer(sess);
+
+       return 1;
+}
+
+/*
+ * Calls iscsit_dec_session_usage_count() as inverse of
+ * lio_tpg_shutdown_session()
+ */
+static void lio_tpg_close_session(struct se_session *se_sess)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+       /*
+        * If the iSCSI Session for the iSCSI Initiator Node exists,
+        * forcefully shutdown the iSCSI NEXUS.
+        */
+       iscsit_stop_session(sess, 1, 1);
+       iscsit_dec_session_usage_count(sess);
+       iscsit_close_session(sess);
+}
+
+static void lio_tpg_stop_session(
+       struct se_session *se_sess,
+       int sess_sleep,
+       int conn_sleep)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+       iscsit_stop_session(sess, sess_sleep, conn_sleep);
+}
+
+static void lio_tpg_fall_back_to_erl0(struct se_session *se_sess)
+{
+       struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+       iscsit_fall_back_to_erl0(sess);
+}
+
+static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+       return tpg->tpg_tiqn->tiqn_index;
+}
+
+static void lio_set_default_node_attributes(struct se_node_acl *se_acl)
+{
+       struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl,
+                               se_node_acl);
+
+       ISCSI_NODE_ATTRIB(acl)->nacl = acl;
+       iscsit_set_default_node_attribues(acl);
+}
+
+static void lio_release_cmd(struct se_cmd *se_cmd)
+{
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       iscsit_release_cmd(cmd);
+}
+
+/* End functions for target_core_fabric_ops */
+
+int iscsi_target_register_configfs(void)
+{
+       struct target_fabric_configfs *fabric;
+       int ret;
+
+       lio_target_fabric_configfs = NULL;
+       fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi");
+       if (IS_ERR(fabric)) {
+               pr_err("target_fabric_configfs_init() for"
+                               " LIO-Target failed!\n");
+               return PTR_ERR(fabric);
+       }
+       /*
+        * Setup the fabric API of function pointers used by target_core_mod..
+        */
+       fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name;
+       fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident;
+       fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn;
+       fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag;
+       fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth;
+       fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id;
+       fabric->tf_ops.tpg_get_pr_transport_id_len =
+                               &iscsi_get_pr_transport_id_len;
+       fabric->tf_ops.tpg_parse_pr_out_transport_id =
+                               &iscsi_parse_pr_out_transport_id;
+       fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode;
+       fabric->tf_ops.tpg_check_demo_mode_cache =
+                               &lio_tpg_check_demo_mode_cache;
+       fabric->tf_ops.tpg_check_demo_mode_write_protect =
+                               &lio_tpg_check_demo_mode_write_protect;
+       fabric->tf_ops.tpg_check_prod_mode_write_protect =
+                               &lio_tpg_check_prod_mode_write_protect;
+       fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl;
+       fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl;
+       fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index;
+       fabric->tf_ops.release_cmd = &lio_release_cmd;
+       fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session;
+       fabric->tf_ops.close_session = &lio_tpg_close_session;
+       fabric->tf_ops.stop_session = &lio_tpg_stop_session;
+       fabric->tf_ops.fall_back_to_erl0 = &lio_tpg_fall_back_to_erl0;
+       fabric->tf_ops.sess_logged_in = &lio_sess_logged_in;
+       fabric->tf_ops.sess_get_index = &lio_sess_get_index;
+       fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid;
+       fabric->tf_ops.write_pending = &lio_write_pending;
+       fabric->tf_ops.write_pending_status = &lio_write_pending_status;
+       fabric->tf_ops.set_default_node_attributes =
+                               &lio_set_default_node_attributes;
+       fabric->tf_ops.get_task_tag = &iscsi_get_task_tag;
+       fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state;
+       fabric->tf_ops.queue_data_in = &lio_queue_data_in;
+       fabric->tf_ops.queue_status = &lio_queue_status;
+       fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp;
+       fabric->tf_ops.set_fabric_sense_len = &lio_set_fabric_sense_len;
+       fabric->tf_ops.get_fabric_sense_len = &lio_get_fabric_sense_len;
+       fabric->tf_ops.is_state_remove = &iscsi_is_state_remove;
+       /*
+        * Setup function pointers for generic logic in target_core_fabric_configfs.c
+        */
+       fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn;
+       fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn;
+       fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg;
+       fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg;
+       fabric->tf_ops.fabric_post_link = NULL;
+       fabric->tf_ops.fabric_pre_unlink = NULL;
+       fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg;
+       fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg;
+       fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl;
+       fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl;
+       /*
+        * Setup default attribute lists for various fabric->tf_cit_tmpl
+        * sturct config_item_type's
+        */
+       TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs;
+       TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs;
+       TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs;
+
+       ret = target_fabric_configfs_register(fabric);
+       if (ret < 0) {
+               pr_err("target_fabric_configfs_register() for"
+                               " LIO-Target failed!\n");
+               target_fabric_configfs_free(fabric);
+               return ret;
+       }
+
+       lio_target_fabric_configfs = fabric;
+       pr_debug("LIO_TARGET[0] - Set fabric ->"
+                       " lio_target_fabric_configfs\n");
+       return 0;
+}
+
+
+void iscsi_target_deregister_configfs(void)
+{
+       if (!lio_target_fabric_configfs)
+               return;
+       /*
+        * Shutdown discovery sessions and disable discovery TPG
+        */
+       if (iscsit_global->discovery_tpg)
+               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
+
+       target_fabric_configfs_deregister(lio_target_fabric_configfs);
+       lio_target_fabric_configfs = NULL;
+       pr_debug("LIO_TARGET[0] - Cleared"
+                               " lio_target_fabric_configfs\n");
+}
diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h
new file mode 100644 (file)
index 0000000..8cd5a63
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef ISCSI_TARGET_CONFIGFS_H
+#define ISCSI_TARGET_CONFIGFS_H
+
+extern int iscsi_target_register_configfs(void);
+extern void iscsi_target_deregister_configfs(void);
+
+#endif /* ISCSI_TARGET_CONFIGFS_H */
diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
new file mode 100644 (file)
index 0000000..470ed55
--- /dev/null
@@ -0,0 +1,859 @@
+#ifndef ISCSI_TARGET_CORE_H
+#define ISCSI_TARGET_CORE_H
+
+#include <linux/in.h>
+#include <linux/configfs.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+
+#define ISCSIT_VERSION                 "v4.1.0-rc1"
+#define ISCSI_MAX_DATASN_MISSING_COUNT 16
+#define ISCSI_TX_THREAD_TCP_TIMEOUT    2
+#define ISCSI_RX_THREAD_TCP_TIMEOUT    2
+#define SECONDS_FOR_ASYNC_LOGOUT       10
+#define SECONDS_FOR_ASYNC_TEXT         10
+#define SECONDS_FOR_LOGOUT_COMP                15
+#define WHITE_SPACE                    " \t\v\f\n\r"
+
+/* struct iscsi_node_attrib sanity values */
+#define NA_DATAOUT_TIMEOUT             3
+#define NA_DATAOUT_TIMEOUT_MAX         60
+#define NA_DATAOUT_TIMEOUT_MIX         2
+#define NA_DATAOUT_TIMEOUT_RETRIES     5
+#define NA_DATAOUT_TIMEOUT_RETRIES_MAX 15
+#define NA_DATAOUT_TIMEOUT_RETRIES_MIN 1
+#define NA_NOPIN_TIMEOUT               5
+#define NA_NOPIN_TIMEOUT_MAX           60
+#define NA_NOPIN_TIMEOUT_MIN           3
+#define NA_NOPIN_RESPONSE_TIMEOUT      5
+#define NA_NOPIN_RESPONSE_TIMEOUT_MAX  60
+#define NA_NOPIN_RESPONSE_TIMEOUT_MIN  3
+#define NA_RANDOM_DATAIN_PDU_OFFSETS   0
+#define NA_RANDOM_DATAIN_SEQ_OFFSETS   0
+#define NA_RANDOM_R2T_OFFSETS          0
+#define NA_DEFAULT_ERL                 0
+#define NA_DEFAULT_ERL_MAX             2
+#define NA_DEFAULT_ERL_MIN             0
+
+/* struct iscsi_tpg_attrib sanity values */
+#define TA_AUTHENTICATION              1
+#define TA_LOGIN_TIMEOUT               15
+#define TA_LOGIN_TIMEOUT_MAX           30
+#define TA_LOGIN_TIMEOUT_MIN           5
+#define TA_NETIF_TIMEOUT               2
+#define TA_NETIF_TIMEOUT_MAX           15
+#define TA_NETIF_TIMEOUT_MIN           2
+#define TA_GENERATE_NODE_ACLS          0
+#define TA_DEFAULT_CMDSN_DEPTH         16
+#define TA_DEFAULT_CMDSN_DEPTH_MAX     512
+#define TA_DEFAULT_CMDSN_DEPTH_MIN     1
+#define TA_CACHE_DYNAMIC_ACLS          0
+/* Enabled by default in demo mode (generic_node_acls=1) */
+#define TA_DEMO_MODE_WRITE_PROTECT     1
+/* Disabled by default in production mode w/ explict ACLs */
+#define TA_PROD_MODE_WRITE_PROTECT     0
+#define TA_CACHE_CORE_NPS              0
+
+enum tpg_np_network_transport_table {
+       ISCSI_TCP                               = 0,
+       ISCSI_SCTP_TCP                          = 1,
+       ISCSI_SCTP_UDP                          = 2,
+       ISCSI_IWARP_TCP                         = 3,
+       ISCSI_IWARP_SCTP                        = 4,
+       ISCSI_INFINIBAND                        = 5,
+};
+
+/* RFC-3720 7.1.4  Standard Connection State Diagram for a Target */
+enum target_conn_state_table {
+       TARG_CONN_STATE_FREE                    = 0x1,
+       TARG_CONN_STATE_XPT_UP                  = 0x3,
+       TARG_CONN_STATE_IN_LOGIN                = 0x4,
+       TARG_CONN_STATE_LOGGED_IN               = 0x5,
+       TARG_CONN_STATE_IN_LOGOUT               = 0x6,
+       TARG_CONN_STATE_LOGOUT_REQUESTED        = 0x7,
+       TARG_CONN_STATE_CLEANUP_WAIT            = 0x8,
+};
+
+/* RFC-3720 7.3.2  Session State Diagram for a Target */
+enum target_sess_state_table {
+       TARG_SESS_STATE_FREE                    = 0x1,
+       TARG_SESS_STATE_ACTIVE                  = 0x2,
+       TARG_SESS_STATE_LOGGED_IN               = 0x3,
+       TARG_SESS_STATE_FAILED                  = 0x4,
+       TARG_SESS_STATE_IN_CONTINUE             = 0x5,
+};
+
+/* struct iscsi_data_count->type */
+enum data_count_type {
+       ISCSI_RX_DATA   = 1,
+       ISCSI_TX_DATA   = 2,
+};
+
+/* struct iscsi_datain_req->dr_complete */
+enum datain_req_comp_table {
+       DATAIN_COMPLETE_NORMAL                  = 1,
+       DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY = 2,
+       DATAIN_COMPLETE_CONNECTION_RECOVERY     = 3,
+};
+
+/* struct iscsi_datain_req->recovery */
+enum datain_req_rec_table {
+       DATAIN_WITHIN_COMMAND_RECOVERY          = 1,
+       DATAIN_CONNECTION_RECOVERY              = 2,
+};
+
+/* struct iscsi_portal_group->state */
+enum tpg_state_table {
+       TPG_STATE_FREE                          = 0,
+       TPG_STATE_ACTIVE                        = 1,
+       TPG_STATE_INACTIVE                      = 2,
+       TPG_STATE_COLD_RESET                    = 3,
+};
+
+/* struct iscsi_tiqn->tiqn_state */
+enum tiqn_state_table {
+       TIQN_STATE_ACTIVE                       = 1,
+       TIQN_STATE_SHUTDOWN                     = 2,
+};
+
+/* struct iscsi_cmd->cmd_flags */
+enum cmd_flags_table {
+       ICF_GOT_LAST_DATAOUT                    = 0x00000001,
+       ICF_GOT_DATACK_SNACK                    = 0x00000002,
+       ICF_NON_IMMEDIATE_UNSOLICITED_DATA      = 0x00000004,
+       ICF_SENT_LAST_R2T                       = 0x00000008,
+       ICF_WITHIN_COMMAND_RECOVERY             = 0x00000010,
+       ICF_CONTIG_MEMORY                       = 0x00000020,
+       ICF_ATTACHED_TO_RQUEUE                  = 0x00000040,
+       ICF_OOO_CMDSN                           = 0x00000080,
+       ICF_REJECT_FAIL_CONN                    = 0x00000100,
+};
+
+/* struct iscsi_cmd->i_state */
+enum cmd_i_state_table {
+       ISTATE_NO_STATE                 = 0,
+       ISTATE_NEW_CMD                  = 1,
+       ISTATE_DEFERRED_CMD             = 2,
+       ISTATE_UNSOLICITED_DATA         = 3,
+       ISTATE_RECEIVE_DATAOUT          = 4,
+       ISTATE_RECEIVE_DATAOUT_RECOVERY = 5,
+       ISTATE_RECEIVED_LAST_DATAOUT    = 6,
+       ISTATE_WITHIN_DATAOUT_RECOVERY  = 7,
+       ISTATE_IN_CONNECTION_RECOVERY   = 8,
+       ISTATE_RECEIVED_TASKMGT         = 9,
+       ISTATE_SEND_ASYNCMSG            = 10,
+       ISTATE_SENT_ASYNCMSG            = 11,
+       ISTATE_SEND_DATAIN              = 12,
+       ISTATE_SEND_LAST_DATAIN         = 13,
+       ISTATE_SENT_LAST_DATAIN         = 14,
+       ISTATE_SEND_LOGOUTRSP           = 15,
+       ISTATE_SENT_LOGOUTRSP           = 16,
+       ISTATE_SEND_NOPIN               = 17,
+       ISTATE_SENT_NOPIN               = 18,
+       ISTATE_SEND_REJECT              = 19,
+       ISTATE_SENT_REJECT              = 20,
+       ISTATE_SEND_R2T                 = 21,
+       ISTATE_SENT_R2T                 = 22,
+       ISTATE_SEND_R2T_RECOVERY        = 23,
+       ISTATE_SENT_R2T_RECOVERY        = 24,
+       ISTATE_SEND_LAST_R2T            = 25,
+       ISTATE_SENT_LAST_R2T            = 26,
+       ISTATE_SEND_LAST_R2T_RECOVERY   = 27,
+       ISTATE_SENT_LAST_R2T_RECOVERY   = 28,
+       ISTATE_SEND_STATUS              = 29,
+       ISTATE_SEND_STATUS_BROKEN_PC    = 30,
+       ISTATE_SENT_STATUS              = 31,
+       ISTATE_SEND_STATUS_RECOVERY     = 32,
+       ISTATE_SENT_STATUS_RECOVERY     = 33,
+       ISTATE_SEND_TASKMGTRSP          = 34,
+       ISTATE_SENT_TASKMGTRSP          = 35,
+       ISTATE_SEND_TEXTRSP             = 36,
+       ISTATE_SENT_TEXTRSP             = 37,
+       ISTATE_SEND_NOPIN_WANT_RESPONSE = 38,
+       ISTATE_SENT_NOPIN_WANT_RESPONSE = 39,
+       ISTATE_SEND_NOPIN_NO_RESPONSE   = 40,
+       ISTATE_REMOVE                   = 41,
+       ISTATE_FREE                     = 42,
+};
+
+/* Used for iscsi_recover_cmdsn() return values */
+enum recover_cmdsn_ret_table {
+       CMDSN_ERROR_CANNOT_RECOVER      = -1,
+       CMDSN_NORMAL_OPERATION          = 0,
+       CMDSN_LOWER_THAN_EXP            = 1,
+       CMDSN_HIGHER_THAN_EXP           = 2,
+};
+
+/* Used for iscsi_handle_immediate_data() return values */
+enum immedate_data_ret_table {
+       IMMEDIATE_DATA_CANNOT_RECOVER   = -1,
+       IMMEDIATE_DATA_NORMAL_OPERATION = 0,
+       IMMEDIATE_DATA_ERL1_CRC_FAILURE = 1,
+};
+
+/* Used for iscsi_decide_dataout_action() return values */
+enum dataout_action_ret_table {
+       DATAOUT_CANNOT_RECOVER          = -1,
+       DATAOUT_NORMAL                  = 0,
+       DATAOUT_SEND_R2T                = 1,
+       DATAOUT_SEND_TO_TRANSPORT       = 2,
+       DATAOUT_WITHIN_COMMAND_RECOVERY = 3,
+};
+
+/* Used for struct iscsi_node_auth->naf_flags */
+enum naf_flags_table {
+       NAF_USERID_SET                  = 0x01,
+       NAF_PASSWORD_SET                = 0x02,
+       NAF_USERID_IN_SET               = 0x04,
+       NAF_PASSWORD_IN_SET             = 0x08,
+};
+
+/* Used by various struct timer_list to manage iSCSI specific state */
+enum iscsi_timer_flags_table {
+       ISCSI_TF_RUNNING                = 0x01,
+       ISCSI_TF_STOP                   = 0x02,
+       ISCSI_TF_EXPIRED                = 0x04,
+};
+
+/* Used for struct iscsi_np->np_flags */
+enum np_flags_table {
+       NPF_IP_NETWORK          = 0x00,
+       NPF_SCTP_STRUCT_FILE    = 0x01 /* Bugfix */
+};
+
+/* Used for struct iscsi_np->np_thread_state */
+enum np_thread_state_table {
+       ISCSI_NP_THREAD_ACTIVE          = 1,
+       ISCSI_NP_THREAD_INACTIVE        = 2,
+       ISCSI_NP_THREAD_RESET           = 3,
+       ISCSI_NP_THREAD_SHUTDOWN        = 4,
+       ISCSI_NP_THREAD_EXIT            = 5,
+};
+
+struct iscsi_conn_ops {
+       u8      HeaderDigest;                   /* [0,1] == [None,CRC32C] */
+       u8      DataDigest;                     /* [0,1] == [None,CRC32C] */
+       u32     MaxRecvDataSegmentLength;       /* [512..2**24-1] */
+       u8      OFMarker;                       /* [0,1] == [No,Yes] */
+       u8      IFMarker;                       /* [0,1] == [No,Yes] */
+       u32     OFMarkInt;                      /* [1..65535] */
+       u32     IFMarkInt;                      /* [1..65535] */
+};
+
+struct iscsi_sess_ops {
+       char    InitiatorName[224];
+       char    InitiatorAlias[256];
+       char    TargetName[224];
+       char    TargetAlias[256];
+       char    TargetAddress[256];
+       u16     TargetPortalGroupTag;           /* [0..65535] */
+       u16     MaxConnections;                 /* [1..65535] */
+       u8      InitialR2T;                     /* [0,1] == [No,Yes] */
+       u8      ImmediateData;                  /* [0,1] == [No,Yes] */
+       u32     MaxBurstLength;                 /* [512..2**24-1] */
+       u32     FirstBurstLength;               /* [512..2**24-1] */
+       u16     DefaultTime2Wait;               /* [0..3600] */
+       u16     DefaultTime2Retain;             /* [0..3600] */
+       u16     MaxOutstandingR2T;              /* [1..65535] */
+       u8      DataPDUInOrder;                 /* [0,1] == [No,Yes] */
+       u8      DataSequenceInOrder;            /* [0,1] == [No,Yes] */
+       u8      ErrorRecoveryLevel;             /* [0..2] */
+       u8      SessionType;                    /* [0,1] == [Normal,Discovery]*/
+};
+
+struct iscsi_queue_req {
+       int                     state;
+       struct iscsi_cmd        *cmd;
+       struct list_head        qr_list;
+};
+
+struct iscsi_data_count {
+       int                     data_length;
+       int                     sync_and_steering;
+       enum data_count_type    type;
+       u32                     iov_count;
+       u32                     ss_iov_count;
+       u32                     ss_marker_count;
+       struct kvec             *iov;
+};
+
+struct iscsi_param_list {
+       struct list_head        param_list;
+       struct list_head        extra_response_list;
+};
+
+struct iscsi_datain_req {
+       enum datain_req_comp_table dr_complete;
+       int                     generate_recovery_values;
+       enum datain_req_rec_table recovery;
+       u32                     begrun;
+       u32                     runlength;
+       u32                     data_length;
+       u32                     data_offset;
+       u32                     data_offset_end;
+       u32                     data_sn;
+       u32                     next_burst_len;
+       u32                     read_data_done;
+       u32                     seq_send_order;
+       struct list_head        dr_list;
+} ____cacheline_aligned;
+
+struct iscsi_ooo_cmdsn {
+       u16                     cid;
+       u32                     batch_count;
+       u32                     cmdsn;
+       u32                     exp_cmdsn;
+       struct iscsi_cmd        *cmd;
+       struct list_head        ooo_list;
+} ____cacheline_aligned;
+
+struct iscsi_datain {
+       u8                      flags;
+       u32                     data_sn;
+       u32                     length;
+       u32                     offset;
+} ____cacheline_aligned;
+
+struct iscsi_r2t {
+       int                     seq_complete;
+       int                     recovery_r2t;
+       int                     sent_r2t;
+       u32                     r2t_sn;
+       u32                     offset;
+       u32                     targ_xfer_tag;
+       u32                     xfer_len;
+       struct list_head        r2t_list;
+} ____cacheline_aligned;
+
+struct iscsi_cmd {
+       enum iscsi_timer_flags_table dataout_timer_flags;
+       /* DataOUT timeout retries */
+       u8                      dataout_timeout_retries;
+       /* Within command recovery count */
+       u8                      error_recovery_count;
+       /* iSCSI dependent state for out or order CmdSNs */
+       enum cmd_i_state_table  deferred_i_state;
+       /* iSCSI dependent state */
+       enum cmd_i_state_table  i_state;
+       /* Command is an immediate command (ISCSI_OP_IMMEDIATE set) */
+       u8                      immediate_cmd;
+       /* Immediate data present */
+       u8                      immediate_data;
+       /* iSCSI Opcode */
+       u8                      iscsi_opcode;
+       /* iSCSI Response Code */
+       u8                      iscsi_response;
+       /* Logout reason when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */
+       u8                      logout_reason;
+       /* Logout response code when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */
+       u8                      logout_response;
+       /* MaxCmdSN has been incremented */
+       u8                      maxcmdsn_inc;
+       /* Immediate Unsolicited Dataout */
+       u8                      unsolicited_data;
+       /* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */
+       u16                     logout_cid;
+       /* Command flags */
+       enum cmd_flags_table    cmd_flags;
+       /* Initiator Task Tag assigned from Initiator */
+       u32                     init_task_tag;
+       /* Target Transfer Tag assigned from Target */
+       u32                     targ_xfer_tag;
+       /* CmdSN assigned from Initiator */
+       u32                     cmd_sn;
+       /* ExpStatSN assigned from Initiator */
+       u32                     exp_stat_sn;
+       /* StatSN assigned to this ITT */
+       u32                     stat_sn;
+       /* DataSN Counter */
+       u32                     data_sn;
+       /* R2TSN Counter */
+       u32                     r2t_sn;
+       /* Last DataSN acknowledged via DataAck SNACK */
+       u32                     acked_data_sn;
+       /* Used for echoing NOPOUT ping data */
+       u32                     buf_ptr_size;
+       /* Used to store DataDigest */
+       u32                     data_crc;
+       /* Total size in bytes associated with command */
+       u32                     data_length;
+       /* Counter for MaxOutstandingR2T */
+       u32                     outstanding_r2ts;
+       /* Next R2T Offset when DataSequenceInOrder=Yes */
+       u32                     r2t_offset;
+       /* Iovec current and orig count for iscsi_cmd->iov_data */
+       u32                     iov_data_count;
+       u32                     orig_iov_data_count;
+       /* Number of miscellaneous iovecs used for IP stack calls */
+       u32                     iov_misc_count;
+       /* Number of struct iscsi_pdu in struct iscsi_cmd->pdu_list */
+       u32                     pdu_count;
+       /* Next struct iscsi_pdu to send in struct iscsi_cmd->pdu_list */
+       u32                     pdu_send_order;
+       /* Current struct iscsi_pdu in struct iscsi_cmd->pdu_list */
+       u32                     pdu_start;
+       u32                     residual_count;
+       /* Next struct iscsi_seq to send in struct iscsi_cmd->seq_list */
+       u32                     seq_send_order;
+       /* Number of struct iscsi_seq in struct iscsi_cmd->seq_list */
+       u32                     seq_count;
+       /* Current struct iscsi_seq in struct iscsi_cmd->seq_list */
+       u32                     seq_no;
+       /* Lowest offset in current DataOUT sequence */
+       u32                     seq_start_offset;
+       /* Highest offset in current DataOUT sequence */
+       u32                     seq_end_offset;
+       /* Total size in bytes received so far of READ data */
+       u32                     read_data_done;
+       /* Total size in bytes received so far of WRITE data */
+       u32                     write_data_done;
+       /* Counter for FirstBurstLength key */
+       u32                     first_burst_len;
+       /* Counter for MaxBurstLength key */
+       u32                     next_burst_len;
+       /* Transfer size used for IP stack calls */
+       u32                     tx_size;
+       /* Buffer used for various purposes */
+       void                    *buf_ptr;
+       /* See include/linux/dma-mapping.h */
+       enum dma_data_direction data_direction;
+       /* iSCSI PDU Header + CRC */
+       unsigned char           pdu[ISCSI_HDR_LEN + ISCSI_CRC_LEN];
+       /* Number of times struct iscsi_cmd is present in immediate queue */
+       atomic_t                immed_queue_count;
+       atomic_t                response_queue_count;
+       atomic_t                transport_sent;
+       spinlock_t              datain_lock;
+       spinlock_t              dataout_timeout_lock;
+       /* spinlock for protecting struct iscsi_cmd->i_state */
+       spinlock_t              istate_lock;
+       /* spinlock for adding within command recovery entries */
+       spinlock_t              error_lock;
+       /* spinlock for adding R2Ts */
+       spinlock_t              r2t_lock;
+       /* DataIN List */
+       struct list_head        datain_list;
+       /* R2T List */
+       struct list_head        cmd_r2t_list;
+       struct completion       reject_comp;
+       /* Timer for DataOUT */
+       struct timer_list       dataout_timer;
+       /* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */
+       struct kvec             *iov_data;
+       /* Iovecs for miscellaneous purposes */
+#define ISCSI_MISC_IOVECS                      5
+       struct kvec             iov_misc[ISCSI_MISC_IOVECS];
+       /* Array of struct iscsi_pdu used for DataPDUInOrder=No */
+       struct iscsi_pdu        *pdu_list;
+       /* Current struct iscsi_pdu used for DataPDUInOrder=No */
+       struct iscsi_pdu        *pdu_ptr;
+       /* Array of struct iscsi_seq used for DataSequenceInOrder=No */
+       struct iscsi_seq        *seq_list;
+       /* Current struct iscsi_seq used for DataSequenceInOrder=No */
+       struct iscsi_seq        *seq_ptr;
+       /* TMR Request when iscsi_opcode == ISCSI_OP_SCSI_TMFUNC */
+       struct iscsi_tmr_req    *tmr_req;
+       /* Connection this command is alligient to */
+       struct iscsi_conn       *conn;
+       /* Pointer to connection recovery entry */
+       struct iscsi_conn_recovery *cr;
+       /* Session the command is part of,  used for connection recovery */
+       struct iscsi_session    *sess;
+       /* list_head for connection list */
+       struct list_head        i_list;
+       /* The TCM I/O descriptor that is accessed via container_of() */
+       struct se_cmd           se_cmd;
+       /* Sense buffer that will be mapped into outgoing status */
+#define ISCSI_SENSE_BUFFER_LEN          (TRANSPORT_SENSE_BUFFER + 2)
+       unsigned char           sense_buffer[ISCSI_SENSE_BUFFER_LEN];
+
+       struct scatterlist      *t_mem_sg;
+       u32                     t_mem_sg_nents;
+
+       u32                     padding;
+       u8                      pad_bytes[4];
+
+       struct scatterlist      *first_data_sg;
+       u32                     first_data_sg_off;
+       u32                     kmapped_nents;
+
+}  ____cacheline_aligned;
+
+struct iscsi_tmr_req {
+       bool                    task_reassign:1;
+       u32                     ref_cmd_sn;
+       u32                     exp_data_sn;
+       struct iscsi_conn_recovery *conn_recovery;
+       struct se_tmr_req       *se_tmr_req;
+};
+
+struct iscsi_conn {
+       /* Authentication Successful for this connection */
+       u8                      auth_complete;
+       /* State connection is currently in */
+       u8                      conn_state;
+       u8                      conn_logout_reason;
+       u8                      network_transport;
+       enum iscsi_timer_flags_table nopin_timer_flags;
+       enum iscsi_timer_flags_table nopin_response_timer_flags;
+       u8                      tx_immediate_queue;
+       u8                      tx_response_queue;
+       /* Used to know what thread encountered a transport failure */
+       u8                      which_thread;
+       /* connection id assigned by the Initiator */
+       u16                     cid;
+       /* Remote TCP Port */
+       u16                     login_port;
+       int                     net_size;
+       u32                     auth_id;
+#define CONNFLAG_SCTP_STRUCT_FILE                      0x01
+       u32                     conn_flags;
+       /* Used for iscsi_tx_login_rsp() */
+       u32                     login_itt;
+       u32                     exp_statsn;
+       /* Per connection status sequence number */
+       u32                     stat_sn;
+       /* IFMarkInt's Current Value */
+       u32                     if_marker;
+       /* OFMarkInt's Current Value */
+       u32                     of_marker;
+       /* Used for calculating OFMarker offset to next PDU */
+       u32                     of_marker_offset;
+       /* Complete Bad PDU for sending reject */
+       unsigned char           bad_hdr[ISCSI_HDR_LEN];
+#define IPV6_ADDRESS_SPACE                             48
+       unsigned char           login_ip[IPV6_ADDRESS_SPACE];
+       int                     conn_usage_count;
+       int                     conn_waiting_on_uc;
+       atomic_t                check_immediate_queue;
+       atomic_t                conn_logout_remove;
+       atomic_t                connection_exit;
+       atomic_t                connection_recovery;
+       atomic_t                connection_reinstatement;
+       atomic_t                connection_wait;
+       atomic_t                connection_wait_rcfr;
+       atomic_t                sleep_on_conn_wait_comp;
+       atomic_t                transport_failed;
+       struct completion       conn_post_wait_comp;
+       struct completion       conn_wait_comp;
+       struct completion       conn_wait_rcfr_comp;
+       struct completion       conn_waiting_on_uc_comp;
+       struct completion       conn_logout_comp;
+       struct completion       tx_half_close_comp;
+       struct completion       rx_half_close_comp;
+       /* socket used by this connection */
+       struct socket           *sock;
+       struct timer_list       nopin_timer;
+       struct timer_list       nopin_response_timer;
+       struct timer_list       transport_timer;
+       /* Spinlock used for add/deleting cmd's from conn_cmd_list */
+       spinlock_t              cmd_lock;
+       spinlock_t              conn_usage_lock;
+       spinlock_t              immed_queue_lock;
+       spinlock_t              nopin_timer_lock;
+       spinlock_t              response_queue_lock;
+       spinlock_t              state_lock;
+       /* libcrypto RX and TX contexts for crc32c */
+       struct hash_desc        conn_rx_hash;
+       struct hash_desc        conn_tx_hash;
+       /* Used for scheduling TX and RX connection kthreads */
+       cpumask_var_t           conn_cpumask;
+       int                     conn_rx_reset_cpumask:1;
+       int                     conn_tx_reset_cpumask:1;
+       /* list_head of struct iscsi_cmd for this connection */
+       struct list_head        conn_cmd_list;
+       struct list_head        immed_queue_list;
+       struct list_head        response_queue_list;
+       struct iscsi_conn_ops   *conn_ops;
+       struct iscsi_param_list *param_list;
+       /* Used for per connection auth state machine */
+       void                    *auth_protocol;
+       struct iscsi_login_thread_s *login_thread;
+       struct iscsi_portal_group *tpg;
+       /* Pointer to parent session */
+       struct iscsi_session    *sess;
+       /* Pointer to thread_set in use for this conn's threads */
+       struct iscsi_thread_set *thread_set;
+       /* list_head for session connection list */
+       struct list_head        conn_list;
+} ____cacheline_aligned;
+
+struct iscsi_conn_recovery {
+       u16                     cid;
+       u32                     cmd_count;
+       u32                     maxrecvdatasegmentlength;
+       int                     ready_for_reallegiance;
+       struct list_head        conn_recovery_cmd_list;
+       spinlock_t              conn_recovery_cmd_lock;
+       struct timer_list       time2retain_timer;
+       struct iscsi_session    *sess;
+       struct list_head        cr_list;
+}  ____cacheline_aligned;
+
+struct iscsi_session {
+       u8                      initiator_vendor;
+       u8                      isid[6];
+       enum iscsi_timer_flags_table time2retain_timer_flags;
+       u8                      version_active;
+       u16                     cid_called;
+       u16                     conn_recovery_count;
+       u16                     tsih;
+       /* state session is currently in */
+       u32                     session_state;
+       /* session wide counter: initiator assigned task tag */
+       u32                     init_task_tag;
+       /* session wide counter: target assigned task tag */
+       u32                     targ_xfer_tag;
+       u32                     cmdsn_window;
+
+       /* protects cmdsn values */
+       struct mutex            cmdsn_mutex;
+       /* session wide counter: expected command sequence number */
+       u32                     exp_cmd_sn;
+       /* session wide counter: maximum allowed command sequence number */
+       u32                     max_cmd_sn;
+       struct list_head        sess_ooo_cmdsn_list;
+
+       /* LIO specific session ID */
+       u32                     sid;
+       char                    auth_type[8];
+       /* unique within the target */
+       int                     session_index;
+       /* Used for session reference counting */
+       int                     session_usage_count;
+       int                     session_waiting_on_uc;
+       u32                     cmd_pdus;
+       u32                     rsp_pdus;
+       u64                     tx_data_octets;
+       u64                     rx_data_octets;
+       u32                     conn_digest_errors;
+       u32                     conn_timeout_errors;
+       u64                     creation_time;
+       spinlock_t              session_stats_lock;
+       /* Number of active connections */
+       atomic_t                nconn;
+       atomic_t                session_continuation;
+       atomic_t                session_fall_back_to_erl0;
+       atomic_t                session_logout;
+       atomic_t                session_reinstatement;
+       atomic_t                session_stop_active;
+       atomic_t                sleep_on_sess_wait_comp;
+       atomic_t                transport_wait_cmds;
+       /* connection list */
+       struct list_head        sess_conn_list;
+       struct list_head        cr_active_list;
+       struct list_head        cr_inactive_list;
+       spinlock_t              conn_lock;
+       spinlock_t              cr_a_lock;
+       spinlock_t              cr_i_lock;
+       spinlock_t              session_usage_lock;
+       spinlock_t              ttt_lock;
+       struct completion       async_msg_comp;
+       struct completion       reinstatement_comp;
+       struct completion       session_wait_comp;
+       struct completion       session_waiting_on_uc_comp;
+       struct timer_list       time2retain_timer;
+       struct iscsi_sess_ops   *sess_ops;
+       struct se_session       *se_sess;
+       struct iscsi_portal_group *tpg;
+} ____cacheline_aligned;
+
+struct iscsi_login {
+       u8 auth_complete;
+       u8 checked_for_existing;
+       u8 current_stage;
+       u8 leading_connection;
+       u8 first_request;
+       u8 version_min;
+       u8 version_max;
+       char isid[6];
+       u32 cmd_sn;
+       u32 init_task_tag;
+       u32 initial_exp_statsn;
+       u32 rsp_length;
+       u16 cid;
+       u16 tsih;
+       char *req;
+       char *rsp;
+       char *req_buf;
+       char *rsp_buf;
+} ____cacheline_aligned;
+
+struct iscsi_node_attrib {
+       u32                     dataout_timeout;
+       u32                     dataout_timeout_retries;
+       u32                     default_erl;
+       u32                     nopin_timeout;
+       u32                     nopin_response_timeout;
+       u32                     random_datain_pdu_offsets;
+       u32                     random_datain_seq_offsets;
+       u32                     random_r2t_offsets;
+       u32                     tmr_cold_reset;
+       u32                     tmr_warm_reset;
+       struct iscsi_node_acl *nacl;
+};
+
+struct se_dev_entry_s;
+
+struct iscsi_node_auth {
+       enum naf_flags_table    naf_flags;
+       int                     authenticate_target;
+       /* Used for iscsit_global->discovery_auth,
+        * set to zero (auth disabled) by default */
+       int                     enforce_discovery_auth;
+#define MAX_USER_LEN                           256
+#define MAX_PASS_LEN                           256
+       char                    userid[MAX_USER_LEN];
+       char                    password[MAX_PASS_LEN];
+       char                    userid_mutual[MAX_USER_LEN];
+       char                    password_mutual[MAX_PASS_LEN];
+};
+
+#include "iscsi_target_stat.h"
+
+struct iscsi_node_stat_grps {
+       struct config_group     iscsi_sess_stats_group;
+       struct config_group     iscsi_conn_stats_group;
+};
+
+struct iscsi_node_acl {
+       struct iscsi_node_attrib node_attrib;
+       struct iscsi_node_auth  node_auth;
+       struct iscsi_node_stat_grps node_stat_grps;
+       struct se_node_acl      se_node_acl;
+};
+
+#define NODE_STAT_GRPS(nacl)   (&(nacl)->node_stat_grps)
+
+#define ISCSI_NODE_ATTRIB(t)   (&(t)->node_attrib)
+#define ISCSI_NODE_AUTH(t)     (&(t)->node_auth)
+
+struct iscsi_tpg_attrib {
+       u32                     authentication;
+       u32                     login_timeout;
+       u32                     netif_timeout;
+       u32                     generate_node_acls;
+       u32                     cache_dynamic_acls;
+       u32                     default_cmdsn_depth;
+       u32                     demo_mode_write_protect;
+       u32                     prod_mode_write_protect;
+       struct iscsi_portal_group *tpg;
+};
+
+struct iscsi_np {
+       int                     np_network_transport;
+       int                     np_ip_proto;
+       int                     np_sock_type;
+       enum np_thread_state_table np_thread_state;
+       enum iscsi_timer_flags_table np_login_timer_flags;
+       u32                     np_exports;
+       enum np_flags_table     np_flags;
+       unsigned char           np_ip[IPV6_ADDRESS_SPACE];
+       u16                     np_port;
+       spinlock_t              np_thread_lock;
+       struct completion       np_restart_comp;
+       struct socket           *np_socket;
+       struct __kernel_sockaddr_storage np_sockaddr;
+       struct task_struct      *np_thread;
+       struct timer_list       np_login_timer;
+       struct iscsi_portal_group *np_login_tpg;
+       struct list_head        np_list;
+} ____cacheline_aligned;
+
+struct iscsi_tpg_np {
+       struct iscsi_np         *tpg_np;
+       struct iscsi_portal_group *tpg;
+       struct iscsi_tpg_np     *tpg_np_parent;
+       struct list_head        tpg_np_list;
+       struct list_head        tpg_np_child_list;
+       struct list_head        tpg_np_parent_list;
+       struct se_tpg_np        se_tpg_np;
+       spinlock_t              tpg_np_parent_lock;
+};
+
+struct iscsi_portal_group {
+       unsigned char           tpg_chap_id;
+       /* TPG State */
+       enum tpg_state_table    tpg_state;
+       /* Target Portal Group Tag */
+       u16                     tpgt;
+       /* Id assigned to target sessions */
+       u16                     ntsih;
+       /* Number of active sessions */
+       u32                     nsessions;
+       /* Number of Network Portals available for this TPG */
+       u32                     num_tpg_nps;
+       /* Per TPG LIO specific session ID. */
+       u32                     sid;
+       /* Spinlock for adding/removing Network Portals */
+       spinlock_t              tpg_np_lock;
+       spinlock_t              tpg_state_lock;
+       struct se_portal_group tpg_se_tpg;
+       struct mutex            tpg_access_lock;
+       struct mutex            np_login_lock;
+       struct iscsi_tpg_attrib tpg_attrib;
+       /* Pointer to default list of iSCSI parameters for TPG */
+       struct iscsi_param_list *param_list;
+       struct iscsi_tiqn       *tpg_tiqn;
+       struct list_head        tpg_gnp_list;
+       struct list_head        tpg_list;
+} ____cacheline_aligned;
+
+#define ISCSI_TPG_C(c)         ((struct iscsi_portal_group *)(c)->tpg)
+#define ISCSI_TPG_LUN(c, l)  ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l])
+#define ISCSI_TPG_S(s)         ((struct iscsi_portal_group *)(s)->tpg)
+#define ISCSI_TPG_ATTRIB(t)    (&(t)->tpg_attrib)
+#define SE_TPG(tpg)            (&(tpg)->tpg_se_tpg)
+
+struct iscsi_wwn_stat_grps {
+       struct config_group     iscsi_stat_group;
+       struct config_group     iscsi_instance_group;
+       struct config_group     iscsi_sess_err_group;
+       struct config_group     iscsi_tgt_attr_group;
+       struct config_group     iscsi_login_stats_group;
+       struct config_group     iscsi_logout_stats_group;
+};
+
+struct iscsi_tiqn {
+#define ISCSI_IQN_LEN                          224
+       unsigned char           tiqn[ISCSI_IQN_LEN];
+       enum tiqn_state_table   tiqn_state;
+       int                     tiqn_access_count;
+       u32                     tiqn_active_tpgs;
+       u32                     tiqn_ntpgs;
+       u32                     tiqn_num_tpg_nps;
+       u32                     tiqn_nsessions;
+       struct list_head        tiqn_list;
+       struct list_head        tiqn_tpg_list;
+       spinlock_t              tiqn_state_lock;
+       spinlock_t              tiqn_tpg_lock;
+       struct se_wwn           tiqn_wwn;
+       struct iscsi_wwn_stat_grps tiqn_stat_grps;
+       int                     tiqn_index;
+       struct iscsi_sess_err_stats  sess_err_stats;
+       struct iscsi_login_stats     login_stats;
+       struct iscsi_logout_stats    logout_stats;
+} ____cacheline_aligned;
+
+#define WWN_STAT_GRPS(tiqn)    (&(tiqn)->tiqn_stat_grps)
+
+struct iscsit_global {
+       /* In core shutdown */
+       u32                     in_shutdown;
+       u32                     active_ts;
+       /* Unique identifier used for the authentication daemon */
+       u32                     auth_id;
+       u32                     inactive_ts;
+       /* Thread Set bitmap count */
+       int                     ts_bitmap_count;
+       /* Thread Set bitmap pointer */
+       unsigned long           *ts_bitmap;
+       /* Used for iSCSI discovery session authentication */
+       struct iscsi_node_acl   discovery_acl;
+       struct iscsi_portal_group       *discovery_tpg;
+};
+
+#endif /* ISCSI_TARGET_CORE_H */
diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c
new file mode 100644 (file)
index 0000000..8c04951
--- /dev/null
@@ -0,0 +1,531 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target DataIN value generation functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_datain_values.h"
+
+struct iscsi_datain_req *iscsit_allocate_datain_req(void)
+{
+       struct iscsi_datain_req *dr;
+
+       dr = kmem_cache_zalloc(lio_dr_cache, GFP_ATOMIC);
+       if (!dr) {
+               pr_err("Unable to allocate memory for"
+                               " struct iscsi_datain_req\n");
+               return NULL;
+       }
+       INIT_LIST_HEAD(&dr->dr_list);
+
+       return dr;
+}
+
+void iscsit_attach_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr)
+{
+       spin_lock(&cmd->datain_lock);
+       list_add_tail(&dr->dr_list, &cmd->datain_list);
+       spin_unlock(&cmd->datain_lock);
+}
+
+void iscsit_free_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr)
+{
+       spin_lock(&cmd->datain_lock);
+       list_del(&dr->dr_list);
+       spin_unlock(&cmd->datain_lock);
+
+       kmem_cache_free(lio_dr_cache, dr);
+}
+
+void iscsit_free_all_datain_reqs(struct iscsi_cmd *cmd)
+{
+       struct iscsi_datain_req *dr, *dr_tmp;
+
+       spin_lock(&cmd->datain_lock);
+       list_for_each_entry_safe(dr, dr_tmp, &cmd->datain_list, dr_list) {
+               list_del(&dr->dr_list);
+               kmem_cache_free(lio_dr_cache, dr);
+       }
+       spin_unlock(&cmd->datain_lock);
+}
+
+struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *cmd)
+{
+       struct iscsi_datain_req *dr;
+
+       if (list_empty(&cmd->datain_list)) {
+               pr_err("cmd->datain_list is empty for ITT:"
+                       " 0x%08x\n", cmd->init_task_tag);
+               return NULL;
+       }
+       list_for_each_entry(dr, &cmd->datain_list, dr_list)
+               break;
+
+       return dr;
+}
+
+/*
+ *     For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=Yes.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_yes(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain *datain)
+{
+       u32 next_burst_len, read_data_done, read_data_left;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_datain_req *dr;
+
+       dr = iscsit_get_datain_req(cmd);
+       if (!dr)
+               return NULL;
+
+       if (dr->recovery && dr->generate_recovery_values) {
+               if (iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+                                       cmd, dr) < 0)
+                       return NULL;
+
+               dr->generate_recovery_values = 0;
+       }
+
+       next_burst_len = (!dr->recovery) ?
+                       cmd->next_burst_len : dr->next_burst_len;
+       read_data_done = (!dr->recovery) ?
+                       cmd->read_data_done : dr->read_data_done;
+
+       read_data_left = (cmd->data_length - read_data_done);
+       if (!read_data_left) {
+               pr_err("ITT: 0x%08x read_data_left is zero!\n",
+                               cmd->init_task_tag);
+               return NULL;
+       }
+
+       if ((read_data_left <= conn->conn_ops->MaxRecvDataSegmentLength) &&
+           (read_data_left <= (conn->sess->sess_ops->MaxBurstLength -
+            next_burst_len))) {
+               datain->length = read_data_left;
+
+               datain->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS);
+               if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                       datain->flags |= ISCSI_FLAG_DATA_ACK;
+       } else {
+               if ((next_burst_len +
+                    conn->conn_ops->MaxRecvDataSegmentLength) <
+                    conn->sess->sess_ops->MaxBurstLength) {
+                       datain->length =
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+                       next_burst_len += datain->length;
+               } else {
+                       datain->length = (conn->sess->sess_ops->MaxBurstLength -
+                                         next_burst_len);
+                       next_burst_len = 0;
+
+                       datain->flags |= ISCSI_FLAG_CMD_FINAL;
+                       if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                               datain->flags |= ISCSI_FLAG_DATA_ACK;
+               }
+       }
+
+       datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+       datain->offset = read_data_done;
+
+       if (!dr->recovery) {
+               cmd->next_burst_len = next_burst_len;
+               cmd->read_data_done += datain->length;
+       } else {
+               dr->next_burst_len = next_burst_len;
+               dr->read_data_done += datain->length;
+       }
+
+       if (!dr->recovery) {
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+                       dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+               return dr;
+       }
+
+       if (!dr->runlength) {
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       } else {
+               if ((dr->begrun + dr->runlength) == dr->data_sn) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       }
+
+       return dr;
+}
+
+/*
+ *     For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=Yes.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_no_and_yes(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain *datain)
+{
+       u32 offset, read_data_done, read_data_left, seq_send_order;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_datain_req *dr;
+       struct iscsi_seq *seq;
+
+       dr = iscsit_get_datain_req(cmd);
+       if (!dr)
+               return NULL;
+
+       if (dr->recovery && dr->generate_recovery_values) {
+               if (iscsit_create_recovery_datain_values_datasequenceinorder_no(
+                                       cmd, dr) < 0)
+                       return NULL;
+
+               dr->generate_recovery_values = 0;
+       }
+
+       read_data_done = (!dr->recovery) ?
+                       cmd->read_data_done : dr->read_data_done;
+       seq_send_order = (!dr->recovery) ?
+                       cmd->seq_send_order : dr->seq_send_order;
+
+       read_data_left = (cmd->data_length - read_data_done);
+       if (!read_data_left) {
+               pr_err("ITT: 0x%08x read_data_left is zero!\n",
+                               cmd->init_task_tag);
+               return NULL;
+       }
+
+       seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order);
+       if (!seq)
+               return NULL;
+
+       seq->sent = 1;
+
+       if (!dr->recovery && !seq->next_burst_len)
+               seq->first_datasn = cmd->data_sn;
+
+       offset = (seq->offset + seq->next_burst_len);
+
+       if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+            cmd->data_length) {
+               datain->length = (cmd->data_length - offset);
+               datain->offset = offset;
+
+               datain->flags |= ISCSI_FLAG_CMD_FINAL;
+               if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                       datain->flags |= ISCSI_FLAG_DATA_ACK;
+
+               seq->next_burst_len = 0;
+               seq_send_order++;
+       } else {
+               if ((seq->next_burst_len +
+                    conn->conn_ops->MaxRecvDataSegmentLength) <
+                    conn->sess->sess_ops->MaxBurstLength) {
+                       datain->length =
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+                       datain->offset = (seq->offset + seq->next_burst_len);
+
+                       seq->next_burst_len += datain->length;
+               } else {
+                       datain->length = (conn->sess->sess_ops->MaxBurstLength -
+                                         seq->next_burst_len);
+                       datain->offset = (seq->offset + seq->next_burst_len);
+
+                       datain->flags |= ISCSI_FLAG_CMD_FINAL;
+                       if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                               datain->flags |= ISCSI_FLAG_DATA_ACK;
+
+                       seq->next_burst_len = 0;
+                       seq_send_order++;
+               }
+       }
+
+       if ((read_data_done + datain->length) == cmd->data_length)
+               datain->flags |= ISCSI_FLAG_DATA_STATUS;
+
+       datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+       if (!dr->recovery) {
+               cmd->seq_send_order = seq_send_order;
+               cmd->read_data_done += datain->length;
+       } else {
+               dr->seq_send_order = seq_send_order;
+               dr->read_data_done += datain->length;
+       }
+
+       if (!dr->recovery) {
+               if (datain->flags & ISCSI_FLAG_CMD_FINAL)
+                       seq->last_datasn = datain->data_sn;
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+                       dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+               return dr;
+       }
+
+       if (!dr->runlength) {
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       } else {
+               if ((dr->begrun + dr->runlength) == dr->data_sn) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       }
+
+       return dr;
+}
+
+/*
+ *     For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=No.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_no(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain *datain)
+{
+       u32 next_burst_len, read_data_done, read_data_left;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_datain_req *dr;
+       struct iscsi_pdu *pdu;
+
+       dr = iscsit_get_datain_req(cmd);
+       if (!dr)
+               return NULL;
+
+       if (dr->recovery && dr->generate_recovery_values) {
+               if (iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+                                       cmd, dr) < 0)
+                       return NULL;
+
+               dr->generate_recovery_values = 0;
+       }
+
+       next_burst_len = (!dr->recovery) ?
+                       cmd->next_burst_len : dr->next_burst_len;
+       read_data_done = (!dr->recovery) ?
+                       cmd->read_data_done : dr->read_data_done;
+
+       read_data_left = (cmd->data_length - read_data_done);
+       if (!read_data_left) {
+               pr_err("ITT: 0x%08x read_data_left is zero!\n",
+                               cmd->init_task_tag);
+               return dr;
+       }
+
+       pdu = iscsit_get_pdu_holder_for_seq(cmd, NULL);
+       if (!pdu)
+               return dr;
+
+       if ((read_data_done + pdu->length) == cmd->data_length) {
+               pdu->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS);
+               if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                       pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+               next_burst_len = 0;
+       } else {
+               if ((next_burst_len + conn->conn_ops->MaxRecvDataSegmentLength) <
+                    conn->sess->sess_ops->MaxBurstLength)
+                       next_burst_len += pdu->length;
+               else {
+                       pdu->flags |= ISCSI_FLAG_CMD_FINAL;
+                       if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                               pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+                       next_burst_len = 0;
+               }
+       }
+
+       pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+       if (!dr->recovery) {
+               cmd->next_burst_len = next_burst_len;
+               cmd->read_data_done += pdu->length;
+       } else {
+               dr->next_burst_len = next_burst_len;
+               dr->read_data_done += pdu->length;
+       }
+
+       datain->flags = pdu->flags;
+       datain->length = pdu->length;
+       datain->offset = pdu->offset;
+       datain->data_sn = pdu->data_sn;
+
+       if (!dr->recovery) {
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+                       dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+               return dr;
+       }
+
+       if (!dr->runlength) {
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       } else {
+               if ((dr->begrun + dr->runlength) == dr->data_sn) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       }
+
+       return dr;
+}
+
+/*
+ *     For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=No.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_no_and_no(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain *datain)
+{
+       u32 read_data_done, read_data_left, seq_send_order;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_datain_req *dr;
+       struct iscsi_pdu *pdu;
+       struct iscsi_seq *seq = NULL;
+
+       dr = iscsit_get_datain_req(cmd);
+       if (!dr)
+               return NULL;
+
+       if (dr->recovery && dr->generate_recovery_values) {
+               if (iscsit_create_recovery_datain_values_datasequenceinorder_no(
+                                       cmd, dr) < 0)
+                       return NULL;
+
+               dr->generate_recovery_values = 0;
+       }
+
+       read_data_done = (!dr->recovery) ?
+                       cmd->read_data_done : dr->read_data_done;
+       seq_send_order = (!dr->recovery) ?
+                       cmd->seq_send_order : dr->seq_send_order;
+
+       read_data_left = (cmd->data_length - read_data_done);
+       if (!read_data_left) {
+               pr_err("ITT: 0x%08x read_data_left is zero!\n",
+                               cmd->init_task_tag);
+               return NULL;
+       }
+
+       seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order);
+       if (!seq)
+               return NULL;
+
+       seq->sent = 1;
+
+       if (!dr->recovery && !seq->next_burst_len)
+               seq->first_datasn = cmd->data_sn;
+
+       pdu = iscsit_get_pdu_holder_for_seq(cmd, seq);
+       if (!pdu)
+               return NULL;
+
+       if (seq->pdu_send_order == seq->pdu_count) {
+               pdu->flags |= ISCSI_FLAG_CMD_FINAL;
+               if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+                       pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+               seq->next_burst_len = 0;
+               seq_send_order++;
+       } else
+               seq->next_burst_len += pdu->length;
+
+       if ((read_data_done + pdu->length) == cmd->data_length)
+               pdu->flags |= ISCSI_FLAG_DATA_STATUS;
+
+       pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+       if (!dr->recovery) {
+               cmd->seq_send_order = seq_send_order;
+               cmd->read_data_done += pdu->length;
+       } else {
+               dr->seq_send_order = seq_send_order;
+               dr->read_data_done += pdu->length;
+       }
+
+       datain->flags = pdu->flags;
+       datain->length = pdu->length;
+       datain->offset = pdu->offset;
+       datain->data_sn = pdu->data_sn;
+
+       if (!dr->recovery) {
+               if (datain->flags & ISCSI_FLAG_CMD_FINAL)
+                       seq->last_datasn = datain->data_sn;
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+                       dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+               return dr;
+       }
+
+       if (!dr->runlength) {
+               if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       } else {
+               if ((dr->begrun + dr->runlength) == dr->data_sn) {
+                       dr->dr_complete =
+                           (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+                               DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+                               DATAIN_COMPLETE_CONNECTION_RECOVERY;
+               }
+       }
+
+       return dr;
+}
+
+struct iscsi_datain_req *iscsit_get_datain_values(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain *datain)
+{
+       struct iscsi_conn *conn = cmd->conn;
+
+       if (conn->sess->sess_ops->DataSequenceInOrder &&
+           conn->sess->sess_ops->DataPDUInOrder)
+               return iscsit_set_datain_values_yes_and_yes(cmd, datain);
+       else if (!conn->sess->sess_ops->DataSequenceInOrder &&
+                 conn->sess->sess_ops->DataPDUInOrder)
+               return iscsit_set_datain_values_no_and_yes(cmd, datain);
+       else if (conn->sess->sess_ops->DataSequenceInOrder &&
+                !conn->sess->sess_ops->DataPDUInOrder)
+               return iscsit_set_datain_values_yes_and_no(cmd, datain);
+       else if (!conn->sess->sess_ops->DataSequenceInOrder &&
+                  !conn->sess->sess_ops->DataPDUInOrder)
+               return iscsit_set_datain_values_no_and_no(cmd, datain);
+
+       return NULL;
+}
diff --git a/drivers/target/iscsi/iscsi_target_datain_values.h b/drivers/target/iscsi/iscsi_target_datain_values.h
new file mode 100644 (file)
index 0000000..646429a
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef ISCSI_TARGET_DATAIN_VALUES_H
+#define ISCSI_TARGET_DATAIN_VALUES_H
+
+extern struct iscsi_datain_req *iscsit_allocate_datain_req(void);
+extern void iscsit_attach_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *);
+extern void iscsit_free_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *);
+extern void iscsit_free_all_datain_reqs(struct iscsi_cmd *);
+extern struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *);
+extern struct iscsi_datain_req *iscsit_get_datain_values(struct iscsi_cmd *,
+                       struct iscsi_datain *);
+
+#endif   /*** ISCSI_TARGET_DATAIN_VALUES_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c
new file mode 100644 (file)
index 0000000..a19fa5e
--- /dev/null
@@ -0,0 +1,87 @@
+/*******************************************************************************
+ * This file contains the iSCSI Virtual Device and Disk Transport
+ * agnostic related functions.
+ *
+ \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/scsi_device.h>
+#include <target/target_core_base.h>
+#include <target/target_core_device.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+
+int iscsit_get_lun_for_tmr(
+       struct iscsi_cmd *cmd,
+       u64 lun)
+{
+       u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+
+       return transport_lookup_tmr_lun(&cmd->se_cmd, unpacked_lun);
+}
+
+int iscsit_get_lun_for_cmd(
+       struct iscsi_cmd *cmd,
+       unsigned char *cdb,
+       u64 lun)
+{
+       u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+
+       return transport_lookup_cmd_lun(&cmd->se_cmd, unpacked_lun);
+}
+
+void iscsit_determine_maxcmdsn(struct iscsi_session *sess)
+{
+       struct se_node_acl *se_nacl;
+
+       /*
+        * This is a discovery session, the single queue slot was already
+        * assigned in iscsi_login_zero_tsih().  Since only Logout and
+        * Text Opcodes are allowed during discovery we do not have to worry
+        * about the HBA's queue depth here.
+        */
+       if (sess->sess_ops->SessionType)
+               return;
+
+       se_nacl = sess->se_sess->se_node_acl;
+
+       /*
+        * This is a normal session, set the Session's CmdSN window to the
+        * struct se_node_acl->queue_depth.  The value in struct se_node_acl->queue_depth
+        * has already been validated as a legal value in
+        * core_set_queue_depth_for_node().
+        */
+       sess->cmdsn_window = se_nacl->queue_depth;
+       sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1;
+}
+
+void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess)
+{
+       if (cmd->immediate_cmd || cmd->maxcmdsn_inc)
+               return;
+
+       cmd->maxcmdsn_inc = 1;
+
+       mutex_lock(&sess->cmdsn_mutex);
+       sess->max_cmd_sn += 1;
+       pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn);
+       mutex_unlock(&sess->cmdsn_mutex);
+}
diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h
new file mode 100644 (file)
index 0000000..bef1cad
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef ISCSI_TARGET_DEVICE_H
+#define ISCSI_TARGET_DEVICE_H
+
+extern int iscsit_get_lun_for_tmr(struct iscsi_cmd *, u64);
+extern int iscsit_get_lun_for_cmd(struct iscsi_cmd *, unsigned char *, u64);
+extern void iscsit_determine_maxcmdsn(struct iscsi_session *);
+extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *);
+
+#endif /* ISCSI_TARGET_DEVICE_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
new file mode 100644 (file)
index 0000000..b7ffc3c
--- /dev/null
@@ -0,0 +1,1004 @@
+/******************************************************************************
+ * This file contains error recovery level zero functions used by
+ * the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+/*
+ *     Used to set values in struct iscsi_cmd that iscsit_dataout_check_sequence()
+ *     checks against to determine a PDU's Offset+Length is within the current
+ *     DataOUT Sequence.  Used for DataSequenceInOrder=Yes only.
+ */
+void iscsit_set_dataout_sequence_values(
+       struct iscsi_cmd *cmd)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       /*
+        * Still set seq_start_offset and seq_end_offset for Unsolicited
+        * DataOUT, even if DataSequenceInOrder=No.
+        */
+       if (cmd->unsolicited_data) {
+               cmd->seq_start_offset = cmd->write_data_done;
+               cmd->seq_end_offset = (cmd->write_data_done +
+                       (cmd->data_length >
+                        conn->sess->sess_ops->FirstBurstLength) ?
+                       conn->sess->sess_ops->FirstBurstLength : cmd->data_length);
+               return;
+       }
+
+       if (!conn->sess->sess_ops->DataSequenceInOrder)
+               return;
+
+       if (!cmd->seq_start_offset && !cmd->seq_end_offset) {
+               cmd->seq_start_offset = cmd->write_data_done;
+               cmd->seq_end_offset = (cmd->data_length >
+                       conn->sess->sess_ops->MaxBurstLength) ?
+                       (cmd->write_data_done +
+                       conn->sess->sess_ops->MaxBurstLength) : cmd->data_length;
+       } else {
+               cmd->seq_start_offset = cmd->seq_end_offset;
+               cmd->seq_end_offset = ((cmd->seq_end_offset +
+                       conn->sess->sess_ops->MaxBurstLength) >=
+                       cmd->data_length) ? cmd->data_length :
+                       (cmd->seq_end_offset +
+                        conn->sess->sess_ops->MaxBurstLength);
+       }
+}
+
+static int iscsit_dataout_within_command_recovery_check(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       /*
+        * We do the within-command recovery checks here as it is
+        * the first function called in iscsi_check_pre_dataout().
+        * Basically, if we are in within-command recovery and
+        * the PDU does not contain the offset the sequence needs,
+        * dump the payload.
+        *
+        * This only applies to DataPDUInOrder=Yes, for
+        * DataPDUInOrder=No we only re-request the failed PDU
+        * and check that all PDUs in a sequence are received
+        * upon end of sequence.
+        */
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               if ((cmd->cmd_flags & ICF_WITHIN_COMMAND_RECOVERY) &&
+                   (cmd->write_data_done != hdr->offset))
+                       goto dump;
+
+               cmd->cmd_flags &= ~ICF_WITHIN_COMMAND_RECOVERY;
+       } else {
+               struct iscsi_seq *seq;
+
+               seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length);
+               if (!seq)
+                       return DATAOUT_CANNOT_RECOVER;
+               /*
+                * Set the struct iscsi_seq pointer to reuse later.
+                */
+               cmd->seq_ptr = seq;
+
+               if (conn->sess->sess_ops->DataPDUInOrder) {
+                       if ((seq->status ==
+                            DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) &&
+                          ((seq->offset != hdr->offset) ||
+                           (seq->data_sn != hdr->datasn)))
+                               goto dump;
+               } else {
+                       if ((seq->status ==
+                            DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) &&
+                           (seq->data_sn != hdr->datasn))
+                               goto dump;
+               }
+
+               if (seq->status == DATAOUT_SEQUENCE_COMPLETE)
+                       goto dump;
+
+               if (seq->status != DATAOUT_SEQUENCE_COMPLETE)
+                       seq->status = 0;
+       }
+
+       return DATAOUT_NORMAL;
+
+dump:
+       pr_err("Dumping DataOUT PDU Offset: %u Length: %d DataSN:"
+               " 0x%08x\n", hdr->offset, payload_length, hdr->datasn);
+       return iscsit_dump_data_payload(conn, payload_length, 1);
+}
+
+static int iscsit_dataout_check_unsolicited_sequence(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       u32 first_burst_len;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+
+       if ((hdr->offset < cmd->seq_start_offset) ||
+          ((hdr->offset + payload_length) > cmd->seq_end_offset)) {
+               pr_err("Command ITT: 0x%08x with Offset: %u,"
+               " Length: %u outside of Unsolicited Sequence %u:%u while"
+               " DataSequenceInOrder=Yes.\n", cmd->init_task_tag,
+               hdr->offset, payload_length, cmd->seq_start_offset,
+                       cmd->seq_end_offset);
+               return DATAOUT_CANNOT_RECOVER;
+       }
+
+       first_burst_len = (cmd->first_burst_len + payload_length);
+
+       if (first_burst_len > conn->sess->sess_ops->FirstBurstLength) {
+               pr_err("Total %u bytes exceeds FirstBurstLength: %u"
+                       " for this Unsolicited DataOut Burst.\n",
+                       first_burst_len, conn->sess->sess_ops->FirstBurstLength);
+               transport_send_check_condition_and_sense(&cmd->se_cmd,
+                               TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+               return DATAOUT_CANNOT_RECOVER;
+       }
+
+       /*
+        * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity
+        * checks for the current Unsolicited DataOUT Sequence.
+        */
+       if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
+               /*
+                * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of
+                * sequence checks are handled in
+                * iscsit_dataout_datapduinorder_no_fbit().
+                */
+               if (!conn->sess->sess_ops->DataPDUInOrder)
+                       goto out;
+
+               if ((first_burst_len != cmd->data_length) &&
+                   (first_burst_len != conn->sess->sess_ops->FirstBurstLength)) {
+                       pr_err("Unsolicited non-immediate data"
+                       " received %u does not equal FirstBurstLength: %u, and"
+                       " does not equal ExpXferLen %u.\n", first_burst_len,
+                               conn->sess->sess_ops->FirstBurstLength,
+                               cmd->data_length);
+                       transport_send_check_condition_and_sense(&cmd->se_cmd,
+                                       TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+                       return DATAOUT_CANNOT_RECOVER;
+               }
+       } else {
+               if (first_burst_len == conn->sess->sess_ops->FirstBurstLength) {
+                       pr_err("Command ITT: 0x%08x reached"
+                       " FirstBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol"
+                               " error.\n", cmd->init_task_tag,
+                               conn->sess->sess_ops->FirstBurstLength);
+                       return DATAOUT_CANNOT_RECOVER;
+               }
+               if (first_burst_len == cmd->data_length) {
+                       pr_err("Command ITT: 0x%08x reached"
+                       " ExpXferLen: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol"
+                       " error.\n", cmd->init_task_tag, cmd->data_length);
+                       return DATAOUT_CANNOT_RECOVER;
+               }
+       }
+
+out:
+       return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_check_sequence(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       u32 next_burst_len;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_seq *seq = NULL;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       /*
+        * For DataSequenceInOrder=Yes: Check that the offset and offset+length
+        * is within range as defined by iscsi_set_dataout_sequence_values().
+        *
+        * For DataSequenceInOrder=No: Check that an struct iscsi_seq exists for
+        * offset+length tuple.
+        */
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               /*
+                * Due to possibility of recovery DataOUT sent by the initiator
+                * fullfilling an Recovery R2T, it's best to just dump the
+                * payload here, instead of erroring out.
+                */
+               if ((hdr->offset < cmd->seq_start_offset) ||
+                  ((hdr->offset + payload_length) > cmd->seq_end_offset)) {
+                       pr_err("Command ITT: 0x%08x with Offset: %u,"
+                       " Length: %u outside of Sequence %u:%u while"
+                       " DataSequenceInOrder=Yes.\n", cmd->init_task_tag,
+                       hdr->offset, payload_length, cmd->seq_start_offset,
+                               cmd->seq_end_offset);
+
+                       if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+                               return DATAOUT_CANNOT_RECOVER;
+                       return DATAOUT_WITHIN_COMMAND_RECOVERY;
+               }
+
+               next_burst_len = (cmd->next_burst_len + payload_length);
+       } else {
+               seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length);
+               if (!seq)
+                       return DATAOUT_CANNOT_RECOVER;
+               /*
+                * Set the struct iscsi_seq pointer to reuse later.
+                */
+               cmd->seq_ptr = seq;
+
+               if (seq->status == DATAOUT_SEQUENCE_COMPLETE) {
+                       if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+                               return DATAOUT_CANNOT_RECOVER;
+                       return DATAOUT_WITHIN_COMMAND_RECOVERY;
+               }
+
+               next_burst_len = (seq->next_burst_len + payload_length);
+       }
+
+       if (next_burst_len > conn->sess->sess_ops->MaxBurstLength) {
+               pr_err("Command ITT: 0x%08x, NextBurstLength: %u and"
+                       " Length: %u exceeds MaxBurstLength: %u. protocol"
+                       " error.\n", cmd->init_task_tag,
+                       (next_burst_len - payload_length),
+                       payload_length, conn->sess->sess_ops->MaxBurstLength);
+               return DATAOUT_CANNOT_RECOVER;
+       }
+
+       /*
+        * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity
+        * checks for the current DataOUT Sequence.
+        */
+       if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
+               /*
+                * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of
+                * sequence checks are handled in
+                * iscsit_dataout_datapduinorder_no_fbit().
+                */
+               if (!conn->sess->sess_ops->DataPDUInOrder)
+                       goto out;
+
+               if (conn->sess->sess_ops->DataSequenceInOrder) {
+                       if ((next_burst_len <
+                            conn->sess->sess_ops->MaxBurstLength) &&
+                          ((cmd->write_data_done + payload_length) <
+                            cmd->data_length)) {
+                               pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL"
+                               " before end of DataOUT sequence, protocol"
+                               " error.\n", cmd->init_task_tag);
+                               return DATAOUT_CANNOT_RECOVER;
+                       }
+               } else {
+                       if (next_burst_len < seq->xfer_len) {
+                               pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL"
+                               " before end of DataOUT sequence, protocol"
+                               " error.\n", cmd->init_task_tag);
+                               return DATAOUT_CANNOT_RECOVER;
+                       }
+               }
+       } else {
+               if (conn->sess->sess_ops->DataSequenceInOrder) {
+                       if (next_burst_len ==
+                                       conn->sess->sess_ops->MaxBurstLength) {
+                               pr_err("Command ITT: 0x%08x reached"
+                               " MaxBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is"
+                               " not set, protocol error.", cmd->init_task_tag,
+                                       conn->sess->sess_ops->MaxBurstLength);
+                               return DATAOUT_CANNOT_RECOVER;
+                       }
+                       if ((cmd->write_data_done + payload_length) ==
+                                       cmd->data_length) {
+                               pr_err("Command ITT: 0x%08x reached"
+                               " last DataOUT PDU in sequence but ISCSI_FLAG_"
+                               "CMD_FINAL is not set, protocol error.\n",
+                                       cmd->init_task_tag);
+                               return DATAOUT_CANNOT_RECOVER;
+                       }
+               } else {
+                       if (next_burst_len == seq->xfer_len) {
+                               pr_err("Command ITT: 0x%08x reached"
+                               " last DataOUT PDU in sequence but ISCSI_FLAG_"
+                               "CMD_FINAL is not set, protocol error.\n",
+                                       cmd->init_task_tag);
+                               return DATAOUT_CANNOT_RECOVER;
+                       }
+               }
+       }
+
+out:
+       return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_check_datasn(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       int dump = 0, recovery = 0;
+       u32 data_sn = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       /*
+        * Considering the target has no method of re-requesting DataOUT
+        * by DataSN, if we receieve a greater DataSN than expected we
+        * assume the functions for DataPDUInOrder=[Yes,No] below will
+        * handle it.
+        *
+        * If the DataSN is less than expected, dump the payload.
+        */
+       if (conn->sess->sess_ops->DataSequenceInOrder)
+               data_sn = cmd->data_sn;
+       else {
+               struct iscsi_seq *seq = cmd->seq_ptr;
+               data_sn = seq->data_sn;
+       }
+
+       if (hdr->datasn > data_sn) {
+               pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
+                       " higher than expected 0x%08x.\n", cmd->init_task_tag,
+                               hdr->datasn, data_sn);
+               recovery = 1;
+               goto recover;
+       } else if (hdr->datasn < data_sn) {
+               pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
+                       " lower than expected 0x%08x, discarding payload.\n",
+                       cmd->init_task_tag, hdr->datasn, data_sn);
+               dump = 1;
+               goto dump;
+       }
+
+       return DATAOUT_NORMAL;
+
+recover:
+       if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+               pr_err("Unable to perform within-command recovery"
+                               " while ERL=0.\n");
+               return DATAOUT_CANNOT_RECOVER;
+       }
+dump:
+       if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+               return DATAOUT_CANNOT_RECOVER;
+
+       return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY :
+                               DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_pre_datapduinorder_yes(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       int dump = 0, recovery = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       /*
+        * For DataSequenceInOrder=Yes: If the offset is greater than the global
+        * DataPDUInOrder=Yes offset counter in struct iscsi_cmd a protcol error has
+        * occured and fail the connection.
+        *
+        * For DataSequenceInOrder=No: If the offset is greater than the per
+        * sequence DataPDUInOrder=Yes offset counter in struct iscsi_seq a protocol
+        * error has occured and fail the connection.
+        */
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               if (hdr->offset != cmd->write_data_done) {
+                       pr_err("Command ITT: 0x%08x, received offset"
+                       " %u different than expected %u.\n", cmd->init_task_tag,
+                               hdr->offset, cmd->write_data_done);
+                       recovery = 1;
+                       goto recover;
+               }
+       } else {
+               struct iscsi_seq *seq = cmd->seq_ptr;
+
+               if (hdr->offset > seq->offset) {
+                       pr_err("Command ITT: 0x%08x, received offset"
+                       " %u greater than expected %u.\n", cmd->init_task_tag,
+                               hdr->offset, seq->offset);
+                       recovery = 1;
+                       goto recover;
+               } else if (hdr->offset < seq->offset) {
+                       pr_err("Command ITT: 0x%08x, received offset"
+                       " %u less than expected %u, discarding payload.\n",
+                               cmd->init_task_tag, hdr->offset, seq->offset);
+                       dump = 1;
+                       goto dump;
+               }
+       }
+
+       return DATAOUT_NORMAL;
+
+recover:
+       if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+               pr_err("Unable to perform within-command recovery"
+                               " while ERL=0.\n");
+               return DATAOUT_CANNOT_RECOVER;
+       }
+dump:
+       if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+               return DATAOUT_CANNOT_RECOVER;
+
+       return (recovery) ? iscsit_recover_dataout_sequence(cmd,
+               hdr->offset, payload_length) :
+              (dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_pre_datapduinorder_no(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       struct iscsi_pdu *pdu;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       pdu = iscsit_get_pdu_holder(cmd, hdr->offset, payload_length);
+       if (!pdu)
+               return DATAOUT_CANNOT_RECOVER;
+
+       cmd->pdu_ptr = pdu;
+
+       switch (pdu->status) {
+       case ISCSI_PDU_NOT_RECEIVED:
+       case ISCSI_PDU_CRC_FAILED:
+       case ISCSI_PDU_TIMED_OUT:
+               break;
+       case ISCSI_PDU_RECEIVED_OK:
+               pr_err("Command ITT: 0x%08x received already gotten"
+                       " Offset: %u, Length: %u\n", cmd->init_task_tag,
+                               hdr->offset, payload_length);
+               return iscsit_dump_data_payload(cmd->conn, payload_length, 1);
+       default:
+               return DATAOUT_CANNOT_RECOVER;
+       }
+
+       return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_update_r2t(struct iscsi_cmd *cmd, u32 offset, u32 length)
+{
+       struct iscsi_r2t *r2t;
+
+       if (cmd->unsolicited_data)
+               return 0;
+
+       r2t = iscsit_get_r2t_for_eos(cmd, offset, length);
+       if (!r2t)
+               return -1;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       r2t->seq_complete = 1;
+       cmd->outstanding_r2ts--;
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       return 0;
+}
+
+static int iscsit_dataout_update_datapduinorder_no(
+       struct iscsi_cmd *cmd,
+       u32 data_sn,
+       int f_bit)
+{
+       int ret = 0;
+       struct iscsi_pdu *pdu = cmd->pdu_ptr;
+
+       pdu->data_sn = data_sn;
+
+       switch (pdu->status) {
+       case ISCSI_PDU_NOT_RECEIVED:
+               pdu->status = ISCSI_PDU_RECEIVED_OK;
+               break;
+       case ISCSI_PDU_CRC_FAILED:
+               pdu->status = ISCSI_PDU_RECEIVED_OK;
+               break;
+       case ISCSI_PDU_TIMED_OUT:
+               pdu->status = ISCSI_PDU_RECEIVED_OK;
+               break;
+       default:
+               return DATAOUT_CANNOT_RECOVER;
+       }
+
+       if (f_bit) {
+               ret = iscsit_dataout_datapduinorder_no_fbit(cmd, pdu);
+               if (ret == DATAOUT_CANNOT_RECOVER)
+                       return ret;
+       }
+
+       return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_post_crc_passed(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       int ret, send_r2t = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_seq *seq = NULL;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       if (cmd->unsolicited_data) {
+               if ((cmd->first_burst_len + payload_length) ==
+                    conn->sess->sess_ops->FirstBurstLength) {
+                       if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+                                       payload_length) < 0)
+                               return DATAOUT_CANNOT_RECOVER;
+                       send_r2t = 1;
+               }
+
+               if (!conn->sess->sess_ops->DataPDUInOrder) {
+                       ret = iscsit_dataout_update_datapduinorder_no(cmd,
+                               hdr->datasn, (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+                       if (ret == DATAOUT_CANNOT_RECOVER)
+                               return ret;
+               }
+
+               cmd->first_burst_len += payload_length;
+
+               if (conn->sess->sess_ops->DataSequenceInOrder)
+                       cmd->data_sn++;
+               else {
+                       seq = cmd->seq_ptr;
+                       seq->data_sn++;
+                       seq->offset += payload_length;
+               }
+
+               if (send_r2t) {
+                       if (seq)
+                               seq->status = DATAOUT_SEQUENCE_COMPLETE;
+                       cmd->first_burst_len = 0;
+                       cmd->unsolicited_data = 0;
+               }
+       } else {
+               if (conn->sess->sess_ops->DataSequenceInOrder) {
+                       if ((cmd->next_burst_len + payload_length) ==
+                            conn->sess->sess_ops->MaxBurstLength) {
+                               if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+                                               payload_length) < 0)
+                                       return DATAOUT_CANNOT_RECOVER;
+                               send_r2t = 1;
+                       }
+
+                       if (!conn->sess->sess_ops->DataPDUInOrder) {
+                               ret = iscsit_dataout_update_datapduinorder_no(
+                                               cmd, hdr->datasn,
+                                               (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+                               if (ret == DATAOUT_CANNOT_RECOVER)
+                                       return ret;
+                       }
+
+                       cmd->next_burst_len += payload_length;
+                       cmd->data_sn++;
+
+                       if (send_r2t)
+                               cmd->next_burst_len = 0;
+               } else {
+                       seq = cmd->seq_ptr;
+
+                       if ((seq->next_burst_len + payload_length) ==
+                            seq->xfer_len) {
+                               if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+                                               payload_length) < 0)
+                                       return DATAOUT_CANNOT_RECOVER;
+                               send_r2t = 1;
+                       }
+
+                       if (!conn->sess->sess_ops->DataPDUInOrder) {
+                               ret = iscsit_dataout_update_datapduinorder_no(
+                                               cmd, hdr->datasn,
+                                               (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+                               if (ret == DATAOUT_CANNOT_RECOVER)
+                                       return ret;
+                       }
+
+                       seq->data_sn++;
+                       seq->offset += payload_length;
+                       seq->next_burst_len += payload_length;
+
+                       if (send_r2t) {
+                               seq->next_burst_len = 0;
+                               seq->status = DATAOUT_SEQUENCE_COMPLETE;
+                       }
+               }
+       }
+
+       if (send_r2t && conn->sess->sess_ops->DataSequenceInOrder)
+               cmd->data_sn = 0;
+
+       cmd->write_data_done += payload_length;
+
+       return (cmd->write_data_done == cmd->data_length) ?
+               DATAOUT_SEND_TO_TRANSPORT : (send_r2t) ?
+               DATAOUT_SEND_R2T : DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_post_crc_failed(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_pdu *pdu;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       if (conn->sess->sess_ops->DataPDUInOrder)
+               goto recover;
+       /*
+        * The rest of this function is only called when DataPDUInOrder=No.
+        */
+       pdu = cmd->pdu_ptr;
+
+       switch (pdu->status) {
+       case ISCSI_PDU_NOT_RECEIVED:
+               pdu->status = ISCSI_PDU_CRC_FAILED;
+               break;
+       case ISCSI_PDU_CRC_FAILED:
+               break;
+       case ISCSI_PDU_TIMED_OUT:
+               pdu->status = ISCSI_PDU_CRC_FAILED;
+               break;
+       default:
+               return DATAOUT_CANNOT_RECOVER;
+       }
+
+recover:
+       return iscsit_recover_dataout_sequence(cmd, hdr->offset, payload_length);
+}
+
+/*
+ *     Called from iscsit_handle_data_out() before DataOUT Payload is received
+ *     and CRC computed.
+ */
+extern int iscsit_check_pre_dataout(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       int ret;
+       struct iscsi_conn *conn = cmd->conn;
+
+       ret = iscsit_dataout_within_command_recovery_check(cmd, buf);
+       if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+           (ret == DATAOUT_CANNOT_RECOVER))
+               return ret;
+
+       ret = iscsit_dataout_check_datasn(cmd, buf);
+       if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+           (ret == DATAOUT_CANNOT_RECOVER))
+               return ret;
+
+       if (cmd->unsolicited_data) {
+               ret = iscsit_dataout_check_unsolicited_sequence(cmd, buf);
+               if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+                   (ret == DATAOUT_CANNOT_RECOVER))
+                       return ret;
+       } else {
+               ret = iscsit_dataout_check_sequence(cmd, buf);
+               if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+                   (ret == DATAOUT_CANNOT_RECOVER))
+                       return ret;
+       }
+
+       return (conn->sess->sess_ops->DataPDUInOrder) ?
+               iscsit_dataout_pre_datapduinorder_yes(cmd, buf) :
+               iscsit_dataout_pre_datapduinorder_no(cmd, buf);
+}
+
+/*
+ *     Called from iscsit_handle_data_out() after DataOUT Payload is received
+ *     and CRC computed.
+ */
+int iscsit_check_post_dataout(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf,
+       u8 data_crc_failed)
+{
+       struct iscsi_conn *conn = cmd->conn;
+
+       cmd->dataout_timeout_retries = 0;
+
+       if (!data_crc_failed)
+               return iscsit_dataout_post_crc_passed(cmd, buf);
+       else {
+               if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+                       pr_err("Unable to recover from DataOUT CRC"
+                               " failure while ERL=0, closing session.\n");
+                       iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR,
+                                       1, 0, buf, cmd);
+                       return DATAOUT_CANNOT_RECOVER;
+               }
+
+               iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR,
+                               0, 0, buf, cmd);
+               return iscsit_dataout_post_crc_failed(cmd, buf);
+       }
+}
+
+static void iscsit_handle_time2retain_timeout(unsigned long data)
+{
+       struct iscsi_session *sess = (struct iscsi_session *) data;
+       struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+       spin_lock_bh(&se_tpg->session_lock);
+       if (sess->time2retain_timer_flags & ISCSI_TF_STOP) {
+               spin_unlock_bh(&se_tpg->session_lock);
+               return;
+       }
+       if (atomic_read(&sess->session_reinstatement)) {
+               pr_err("Exiting Time2Retain handler because"
+                               " session_reinstatement=1\n");
+               spin_unlock_bh(&se_tpg->session_lock);
+               return;
+       }
+       sess->time2retain_timer_flags |= ISCSI_TF_EXPIRED;
+
+       pr_err("Time2Retain timer expired for SID: %u, cleaning up"
+                       " iSCSI session.\n", sess->sid);
+       {
+       struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+       if (tiqn) {
+               spin_lock(&tiqn->sess_err_stats.lock);
+               strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name,
+                       (void *)sess->sess_ops->InitiatorName);
+               tiqn->sess_err_stats.last_sess_failure_type =
+                               ISCSI_SESS_ERR_CXN_TIMEOUT;
+               tiqn->sess_err_stats.cxn_timeout_errors++;
+               sess->conn_timeout_errors++;
+               spin_unlock(&tiqn->sess_err_stats.lock);
+       }
+       }
+
+       spin_unlock_bh(&se_tpg->session_lock);
+       iscsit_close_session(sess);
+}
+
+extern void iscsit_start_time2retain_handler(struct iscsi_session *sess)
+{
+       int tpg_active;
+       /*
+        * Only start Time2Retain timer when the assoicated TPG is still in
+        * an ACTIVE (eg: not disabled or shutdown) state.
+        */
+       spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock);
+       tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE);
+       spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock);
+
+       if (!tpg_active)
+               return;
+
+       if (sess->time2retain_timer_flags & ISCSI_TF_RUNNING)
+               return;
+
+       pr_debug("Starting Time2Retain timer for %u seconds on"
+               " SID: %u\n", sess->sess_ops->DefaultTime2Retain, sess->sid);
+
+       init_timer(&sess->time2retain_timer);
+       sess->time2retain_timer.expires =
+               (get_jiffies_64() + sess->sess_ops->DefaultTime2Retain * HZ);
+       sess->time2retain_timer.data = (unsigned long)sess;
+       sess->time2retain_timer.function = iscsit_handle_time2retain_timeout;
+       sess->time2retain_timer_flags &= ~ISCSI_TF_STOP;
+       sess->time2retain_timer_flags |= ISCSI_TF_RUNNING;
+       add_timer(&sess->time2retain_timer);
+}
+
+/*
+ *     Called with spin_lock_bh(&struct se_portal_group->session_lock) held
+ */
+extern int iscsit_stop_time2retain_timer(struct iscsi_session *sess)
+{
+       struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+       if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)
+               return -1;
+
+       if (!(sess->time2retain_timer_flags & ISCSI_TF_RUNNING))
+               return 0;
+
+       sess->time2retain_timer_flags |= ISCSI_TF_STOP;
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       del_timer_sync(&sess->time2retain_timer);
+
+       spin_lock_bh(&se_tpg->session_lock);
+       sess->time2retain_timer_flags &= ~ISCSI_TF_RUNNING;
+       pr_debug("Stopped Time2Retain Timer for SID: %u\n",
+                       sess->sid);
+       return 0;
+}
+
+void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->state_lock);
+       if (atomic_read(&conn->connection_exit)) {
+               spin_unlock_bh(&conn->state_lock);
+               goto sleep;
+       }
+
+       if (atomic_read(&conn->transport_failed)) {
+               spin_unlock_bh(&conn->state_lock);
+               goto sleep;
+       }
+       spin_unlock_bh(&conn->state_lock);
+
+       iscsi_thread_set_force_reinstatement(conn);
+
+sleep:
+       wait_for_completion(&conn->conn_wait_rcfr_comp);
+       complete(&conn->conn_post_wait_comp);
+}
+
+void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep)
+{
+       spin_lock_bh(&conn->state_lock);
+       if (atomic_read(&conn->connection_exit)) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+
+       if (atomic_read(&conn->transport_failed)) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+
+       if (atomic_read(&conn->connection_reinstatement)) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+
+       if (iscsi_thread_set_force_reinstatement(conn) < 0) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+
+       atomic_set(&conn->connection_reinstatement, 1);
+       if (!sleep) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+
+       atomic_set(&conn->sleep_on_conn_wait_comp, 1);
+       spin_unlock_bh(&conn->state_lock);
+
+       wait_for_completion(&conn->conn_wait_comp);
+       complete(&conn->conn_post_wait_comp);
+}
+
+void iscsit_fall_back_to_erl0(struct iscsi_session *sess)
+{
+       pr_debug("Falling back to ErrorRecoveryLevel=0 for SID:"
+                       " %u\n", sess->sid);
+
+       atomic_set(&sess->session_fall_back_to_erl0, 1);
+}
+
+static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+
+       if ((sess->sess_ops->ErrorRecoveryLevel == 2) &&
+           !atomic_read(&sess->session_reinstatement) &&
+           !atomic_read(&sess->session_fall_back_to_erl0))
+               iscsit_connection_recovery_transport_reset(conn);
+       else {
+               pr_debug("Performing cleanup for failed iSCSI"
+                       " Connection ID: %hu from %s\n", conn->cid,
+                       sess->sess_ops->InitiatorName);
+               iscsit_close_connection(conn);
+       }
+}
+
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->state_lock);
+       if (atomic_read(&conn->connection_exit)) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+       atomic_set(&conn->connection_exit, 1);
+
+       if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) {
+               spin_unlock_bh(&conn->state_lock);
+               iscsit_close_connection(conn);
+               return;
+       }
+
+       if (conn->conn_state == TARG_CONN_STATE_CLEANUP_WAIT) {
+               spin_unlock_bh(&conn->state_lock);
+               return;
+       }
+
+       pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n");
+       conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT;
+       spin_unlock_bh(&conn->state_lock);
+
+       iscsit_handle_connection_cleanup(conn);
+}
+
+/*
+ *     This is the simple function that makes the magic of
+ *     sync and steering happen in the follow paradoxical order:
+ *
+ *     0) Receive conn->of_marker (bytes left until next OFMarker)
+ *        bytes into an offload buffer.  When we pass the exact number
+ *        of bytes in conn->of_marker, iscsit_dump_data_payload() and hence
+ *        rx_data() will automatically receive the identical u32 marker
+ *        values and store it in conn->of_marker_offset;
+ *     1) Now conn->of_marker_offset will contain the offset to the start
+ *        of the next iSCSI PDU.  Dump these remaining bytes into another
+ *        offload buffer.
+ *     2) We are done!
+ *        Next byte in the TCP stream will contain the next iSCSI PDU!
+ *        Cool Huh?!
+ */
+int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn)
+{
+       /*
+        * Make sure the remaining bytes to next maker is a sane value.
+        */
+       if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) {
+               pr_err("Remaining bytes to OFMarker: %u exceeds"
+                       " OFMarkInt bytes: %u.\n", conn->of_marker,
+                               conn->conn_ops->OFMarkInt * 4);
+               return -1;
+       }
+
+       pr_debug("Advancing %u bytes in TCP stream to get to the"
+                       " next OFMarker.\n", conn->of_marker);
+
+       if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0)
+               return -1;
+
+       /*
+        * Make sure the offset marker we retrived is a valid value.
+        */
+       if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) +
+           conn->conn_ops->MaxRecvDataSegmentLength)) {
+               pr_err("OfMarker offset value: %u exceeds limit.\n",
+                       conn->of_marker_offset);
+               return -1;
+       }
+
+       pr_debug("Discarding %u bytes of TCP stream to get to the"
+                       " next iSCSI Opcode.\n", conn->of_marker_offset);
+
+       if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0)
+               return -1;
+
+       return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h
new file mode 100644 (file)
index 0000000..21acc9a
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef ISCSI_TARGET_ERL0_H
+#define ISCSI_TARGET_ERL0_H
+
+extern void iscsit_set_dataout_sequence_values(struct iscsi_cmd *);
+extern int iscsit_check_pre_dataout(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_check_post_dataout(struct iscsi_cmd *, unsigned char *, u8);
+extern void iscsit_start_time2retain_handler(struct iscsi_session *);
+extern int iscsit_stop_time2retain_timer(struct iscsi_session *);
+extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *);
+extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int);
+extern void iscsit_fall_back_to_erl0(struct iscsi_session *);
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *);
+extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *);
+
+#endif   /*** ISCSI_TARGET_ERL0_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
new file mode 100644 (file)
index 0000000..9806507
--- /dev/null
@@ -0,0 +1,1299 @@
+/*******************************************************************************
+ * This file contains error recovery level one used by the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target.h"
+
+#define OFFLOAD_BUF_SIZE       32768
+
+/*
+ *     Used to dump excess datain payload for certain error recovery
+ *     situations.  Receive in OFFLOAD_BUF_SIZE max of datain per rx_data().
+ *
+ *     dump_padding_digest denotes if padding and data digests need
+ *     to be dumped.
+ */
+int iscsit_dump_data_payload(
+       struct iscsi_conn *conn,
+       u32 buf_len,
+       int dump_padding_digest)
+{
+       char *buf, pad_bytes[4];
+       int ret = DATAOUT_WITHIN_COMMAND_RECOVERY, rx_got;
+       u32 length, padding, offset = 0, size;
+       struct kvec iov;
+
+       length = (buf_len > OFFLOAD_BUF_SIZE) ? OFFLOAD_BUF_SIZE : buf_len;
+
+       buf = kzalloc(length, GFP_ATOMIC);
+       if (!buf) {
+               pr_err("Unable to allocate %u bytes for offload"
+                               " buffer.\n", length);
+               return -1;
+       }
+       memset(&iov, 0, sizeof(struct kvec));
+
+       while (offset < buf_len) {
+               size = ((offset + length) > buf_len) ?
+                       (buf_len - offset) : length;
+
+               iov.iov_len = size;
+               iov.iov_base = buf;
+
+               rx_got = rx_data(conn, &iov, 1, size);
+               if (rx_got != size) {
+                       ret = DATAOUT_CANNOT_RECOVER;
+                       goto out;
+               }
+
+               offset += size;
+       }
+
+       if (!dump_padding_digest)
+               goto out;
+
+       padding = ((-buf_len) & 3);
+       if (padding != 0) {
+               iov.iov_len = padding;
+               iov.iov_base = pad_bytes;
+
+               rx_got = rx_data(conn, &iov, 1, padding);
+               if (rx_got != padding) {
+                       ret = DATAOUT_CANNOT_RECOVER;
+                       goto out;
+               }
+       }
+
+       if (conn->conn_ops->DataDigest) {
+               u32 data_crc;
+
+               iov.iov_len = ISCSI_CRC_LEN;
+               iov.iov_base = &data_crc;
+
+               rx_got = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
+               if (rx_got != ISCSI_CRC_LEN) {
+                       ret = DATAOUT_CANNOT_RECOVER;
+                       goto out;
+               }
+       }
+
+out:
+       kfree(buf);
+       return ret;
+}
+
+/*
+ *     Used for retransmitting R2Ts from a R2T SNACK request.
+ */
+static int iscsit_send_recovery_r2t_for_snack(
+       struct iscsi_cmd *cmd,
+       struct iscsi_r2t *r2t)
+{
+       /*
+        * If the struct iscsi_r2t has not been sent yet, we can safely
+        * ignore retransmission
+        * of the R2TSN in question.
+        */
+       spin_lock_bh(&cmd->r2t_lock);
+       if (!r2t->sent_r2t) {
+               spin_unlock_bh(&cmd->r2t_lock);
+               return 0;
+       }
+       r2t->sent_r2t = 0;
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T);
+
+       return 0;
+}
+
+static int iscsit_handle_r2t_snack(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf,
+       u32 begrun,
+       u32 runlength)
+{
+       u32 last_r2tsn;
+       struct iscsi_r2t *r2t;
+
+       /*
+        * Make sure the initiator is not requesting retransmission
+        * of R2TSNs already acknowledged by a TMR TASK_REASSIGN.
+        */
+       if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+           (begrun <= cmd->acked_data_sn)) {
+               pr_err("ITT: 0x%08x, R2T SNACK requesting"
+                       " retransmission of R2TSN: 0x%08x to 0x%08x but already"
+                       " acked to  R2TSN: 0x%08x by TMR TASK_REASSIGN,"
+                       " protocol error.\n", cmd->init_task_tag, begrun,
+                       (begrun + runlength), cmd->acked_data_sn);
+
+                       return iscsit_add_reject_from_cmd(
+                                       ISCSI_REASON_PROTOCOL_ERROR,
+                                       1, 0, buf, cmd);
+       }
+
+       if (runlength) {
+               if ((begrun + runlength) > cmd->r2t_sn) {
+                       pr_err("Command ITT: 0x%08x received R2T SNACK"
+                       " with BegRun: 0x%08x, RunLength: 0x%08x, exceeds"
+                       " current R2TSN: 0x%08x, protocol error.\n",
+                       cmd->init_task_tag, begrun, runlength, cmd->r2t_sn);
+                       return iscsit_add_reject_from_cmd(
+                               ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd);
+               }
+               last_r2tsn = (begrun + runlength);
+       } else
+               last_r2tsn = cmd->r2t_sn;
+
+       while (begrun < last_r2tsn) {
+               r2t = iscsit_get_holder_for_r2tsn(cmd, begrun);
+               if (!r2t)
+                       return -1;
+               if (iscsit_send_recovery_r2t_for_snack(cmd, r2t) < 0)
+                       return -1;
+
+               begrun++;
+       }
+
+       return 0;
+}
+
+/*
+ *     Generates Offsets and NextBurstLength based on Begrun and Runlength
+ *     carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN.
+ *
+ *     For DataSequenceInOrder=Yes and DataPDUInOrder=[Yes,No] only.
+ *
+ *     FIXME: How is this handled for a RData SNACK?
+ */
+int iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain_req *dr)
+{
+       u32 data_sn = 0, data_sn_count = 0;
+       u32 pdu_start = 0, seq_no = 0;
+       u32 begrun = dr->begrun;
+       struct iscsi_conn *conn = cmd->conn;
+
+       while (begrun > data_sn++) {
+               data_sn_count++;
+               if ((dr->next_burst_len +
+                    conn->conn_ops->MaxRecvDataSegmentLength) <
+                    conn->sess->sess_ops->MaxBurstLength) {
+                       dr->read_data_done +=
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+                       dr->next_burst_len +=
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+               } else {
+                       dr->read_data_done +=
+                               (conn->sess->sess_ops->MaxBurstLength -
+                                dr->next_burst_len);
+                       dr->next_burst_len = 0;
+                       pdu_start += data_sn_count;
+                       data_sn_count = 0;
+                       seq_no++;
+               }
+       }
+
+       if (!conn->sess->sess_ops->DataPDUInOrder) {
+               cmd->seq_no = seq_no;
+               cmd->pdu_start = pdu_start;
+               cmd->pdu_send_order = data_sn_count;
+       }
+
+       return 0;
+}
+
+/*
+ *     Generates Offsets and NextBurstLength based on Begrun and Runlength
+ *     carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN.
+ *
+ *     For DataSequenceInOrder=No and DataPDUInOrder=[Yes,No] only.
+ *
+ *     FIXME: How is this handled for a RData SNACK?
+ */
+int iscsit_create_recovery_datain_values_datasequenceinorder_no(
+       struct iscsi_cmd *cmd,
+       struct iscsi_datain_req *dr)
+{
+       int found_seq = 0, i;
+       u32 data_sn, read_data_done = 0, seq_send_order = 0;
+       u32 begrun = dr->begrun;
+       u32 runlength = dr->runlength;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_seq *first_seq = NULL, *seq = NULL;
+
+       if (!cmd->seq_list) {
+               pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+               return -1;
+       }
+
+       /*
+        * Calculate read_data_done for all sequences containing a
+        * first_datasn and last_datasn less than the BegRun.
+        *
+        * Locate the struct iscsi_seq the BegRun lies within and calculate
+        * NextBurstLenghth up to the DataSN based on MaxRecvDataSegmentLength.
+        *
+        * Also use struct iscsi_seq->seq_send_order to determine where to start.
+        */
+       for (i = 0; i < cmd->seq_count; i++) {
+               seq = &cmd->seq_list[i];
+
+               if (!seq->seq_send_order)
+                       first_seq = seq;
+
+               /*
+                * No data has been transferred for this DataIN sequence, so the
+                * seq->first_datasn and seq->last_datasn have not been set.
+                */
+               if (!seq->sent) {
+#if 0
+                       pr_err("Ignoring non-sent sequence 0x%08x ->"
+                               " 0x%08x\n\n", seq->first_datasn,
+                               seq->last_datasn);
+#endif
+                       continue;
+               }
+
+               /*
+                * This DataIN sequence is precedes the received BegRun, add the
+                * total xfer_len of the sequence to read_data_done and reset
+                * seq->pdu_send_order.
+                */
+               if ((seq->first_datasn < begrun) &&
+                               (seq->last_datasn < begrun)) {
+#if 0
+                       pr_err("Pre BegRun sequence 0x%08x ->"
+                               " 0x%08x\n", seq->first_datasn,
+                               seq->last_datasn);
+#endif
+                       read_data_done += cmd->seq_list[i].xfer_len;
+                       seq->next_burst_len = seq->pdu_send_order = 0;
+                       continue;
+               }
+
+               /*
+                * The BegRun lies within this DataIN sequence.
+                */
+               if ((seq->first_datasn <= begrun) &&
+                               (seq->last_datasn >= begrun)) {
+#if 0
+                       pr_err("Found sequence begrun: 0x%08x in"
+                               " 0x%08x -> 0x%08x\n", begrun,
+                               seq->first_datasn, seq->last_datasn);
+#endif
+                       seq_send_order = seq->seq_send_order;
+                       data_sn = seq->first_datasn;
+                       seq->next_burst_len = seq->pdu_send_order = 0;
+                       found_seq = 1;
+
+                       /*
+                        * For DataPDUInOrder=Yes, while the first DataSN of
+                        * the sequence is less than the received BegRun, add
+                        * the MaxRecvDataSegmentLength to read_data_done and
+                        * to the sequence's next_burst_len;
+                        *
+                        * For DataPDUInOrder=No, while the first DataSN of the
+                        * sequence is less than the received BegRun, find the
+                        * struct iscsi_pdu of the DataSN in question and add the
+                        * MaxRecvDataSegmentLength to read_data_done and to the
+                        * sequence's next_burst_len;
+                        */
+                       if (conn->sess->sess_ops->DataPDUInOrder) {
+                               while (data_sn < begrun) {
+                                       seq->pdu_send_order++;
+                                       read_data_done +=
+                                               conn->conn_ops->MaxRecvDataSegmentLength;
+                                       seq->next_burst_len +=
+                                               conn->conn_ops->MaxRecvDataSegmentLength;
+                                       data_sn++;
+                               }
+                       } else {
+                               int j;
+                               struct iscsi_pdu *pdu;
+
+                               while (data_sn < begrun) {
+                                       seq->pdu_send_order++;
+
+                                       for (j = 0; j < seq->pdu_count; j++) {
+                                               pdu = &cmd->pdu_list[
+                                                       seq->pdu_start + j];
+                                               if (pdu->data_sn == data_sn) {
+                                                       read_data_done +=
+                                                               pdu->length;
+                                                       seq->next_burst_len +=
+                                                               pdu->length;
+                                               }
+                                       }
+                                       data_sn++;
+                               }
+                       }
+                       continue;
+               }
+
+               /*
+                * This DataIN sequence is larger than the received BegRun,
+                * reset seq->pdu_send_order and continue.
+                */
+               if ((seq->first_datasn > begrun) ||
+                               (seq->last_datasn > begrun)) {
+#if 0
+                       pr_err("Post BegRun sequence 0x%08x -> 0x%08x\n",
+                                       seq->first_datasn, seq->last_datasn);
+#endif
+                       seq->next_burst_len = seq->pdu_send_order = 0;
+                       continue;
+               }
+       }
+
+       if (!found_seq) {
+               if (!begrun) {
+                       if (!first_seq) {
+                               pr_err("ITT: 0x%08x, Begrun: 0x%08x"
+                                       " but first_seq is NULL\n",
+                                       cmd->init_task_tag, begrun);
+                               return -1;
+                       }
+                       seq_send_order = first_seq->seq_send_order;
+                       seq->next_burst_len = seq->pdu_send_order = 0;
+                       goto done;
+               }
+
+               pr_err("Unable to locate struct iscsi_seq for ITT: 0x%08x,"
+                       " BegRun: 0x%08x, RunLength: 0x%08x while"
+                       " DataSequenceInOrder=No and DataPDUInOrder=%s.\n",
+                               cmd->init_task_tag, begrun, runlength,
+                       (conn->sess->sess_ops->DataPDUInOrder) ? "Yes" : "No");
+               return -1;
+       }
+
+done:
+       dr->read_data_done = read_data_done;
+       dr->seq_send_order = seq_send_order;
+
+       return 0;
+}
+
+static int iscsit_handle_recovery_datain(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf,
+       u32 begrun,
+       u32 runlength)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_datain_req *dr;
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+
+       if (!atomic_read(&se_cmd->t_transport_complete)) {
+               pr_err("Ignoring ITT: 0x%08x Data SNACK\n",
+                               cmd->init_task_tag);
+               return 0;
+       }
+
+       /*
+        * Make sure the initiator is not requesting retransmission
+        * of DataSNs already acknowledged by a Data ACK SNACK.
+        */
+       if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+           (begrun <= cmd->acked_data_sn)) {
+               pr_err("ITT: 0x%08x, Data SNACK requesting"
+                       " retransmission of DataSN: 0x%08x to 0x%08x but"
+                       " already acked to DataSN: 0x%08x by Data ACK SNACK,"
+                       " protocol error.\n", cmd->init_task_tag, begrun,
+                       (begrun + runlength), cmd->acked_data_sn);
+
+               return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR,
+                               1, 0, buf, cmd);
+       }
+
+       /*
+        * Make sure BegRun and RunLength in the Data SNACK are sane.
+        * Note: (cmd->data_sn - 1) will carry the maximum DataSN sent.
+        */
+       if ((begrun + runlength) > (cmd->data_sn - 1)) {
+               pr_err("Initiator requesting BegRun: 0x%08x, RunLength"
+                       ": 0x%08x greater than maximum DataSN: 0x%08x.\n",
+                               begrun, runlength, (cmd->data_sn - 1));
+               return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID,
+                               1, 0, buf, cmd);
+       }
+
+       dr = iscsit_allocate_datain_req();
+       if (!dr)
+               return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+                               1, 0, buf, cmd);
+
+       dr->data_sn = dr->begrun = begrun;
+       dr->runlength = runlength;
+       dr->generate_recovery_values = 1;
+       dr->recovery = DATAIN_WITHIN_COMMAND_RECOVERY;
+
+       iscsit_attach_datain_req(cmd, dr);
+
+       cmd->i_state = ISTATE_SEND_DATAIN;
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+       return 0;
+}
+
+int iscsit_handle_recovery_datain_or_r2t(
+       struct iscsi_conn *conn,
+       unsigned char *buf,
+       u32 init_task_tag,
+       u32 targ_xfer_tag,
+       u32 begrun,
+       u32 runlength)
+{
+       struct iscsi_cmd *cmd;
+
+       cmd = iscsit_find_cmd_from_itt(conn, init_task_tag);
+       if (!cmd)
+               return 0;
+
+       /*
+        * FIXME: This will not work for bidi commands.
+        */
+       switch (cmd->data_direction) {
+       case DMA_TO_DEVICE:
+               return iscsit_handle_r2t_snack(cmd, buf, begrun, runlength);
+       case DMA_FROM_DEVICE:
+               return iscsit_handle_recovery_datain(cmd, buf, begrun,
+                               runlength);
+       default:
+               pr_err("Unknown cmd->data_direction: 0x%02x\n",
+                               cmd->data_direction);
+               return -1;
+       }
+
+       return 0;
+}
+
+/* #warning FIXME: Status SNACK needs to be dependent on OPCODE!!! */
+int iscsit_handle_status_snack(
+       struct iscsi_conn *conn,
+       u32 init_task_tag,
+       u32 targ_xfer_tag,
+       u32 begrun,
+       u32 runlength)
+{
+       struct iscsi_cmd *cmd = NULL;
+       u32 last_statsn;
+       int found_cmd;
+
+       if (conn->exp_statsn > begrun) {
+               pr_err("Got Status SNACK Begrun: 0x%08x, RunLength:"
+                       " 0x%08x but already got ExpStatSN: 0x%08x on CID:"
+                       " %hu.\n", begrun, runlength, conn->exp_statsn,
+                       conn->cid);
+               return 0;
+       }
+
+       last_statsn = (!runlength) ? conn->stat_sn : (begrun + runlength);
+
+       while (begrun < last_statsn) {
+               found_cmd = 0;
+
+               spin_lock_bh(&conn->cmd_lock);
+               list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+                       if (cmd->stat_sn == begrun) {
+                               found_cmd = 1;
+                               break;
+                       }
+               }
+               spin_unlock_bh(&conn->cmd_lock);
+
+               if (!found_cmd) {
+                       pr_err("Unable to find StatSN: 0x%08x for"
+                               " a Status SNACK, assuming this was a"
+                               " protactic SNACK for an untransmitted"
+                               " StatSN, ignoring.\n", begrun);
+                       begrun++;
+                       continue;
+               }
+
+               spin_lock_bh(&cmd->istate_lock);
+               if (cmd->i_state == ISTATE_SEND_DATAIN) {
+                       spin_unlock_bh(&cmd->istate_lock);
+                       pr_err("Ignoring Status SNACK for BegRun:"
+                               " 0x%08x, RunLength: 0x%08x, assuming this was"
+                               " a protactic SNACK for an untransmitted"
+                               " StatSN\n", begrun, runlength);
+                       begrun++;
+                       continue;
+               }
+               spin_unlock_bh(&cmd->istate_lock);
+
+               cmd->i_state = ISTATE_SEND_STATUS_RECOVERY;
+               iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+               begrun++;
+       }
+
+       return 0;
+}
+
+int iscsit_handle_data_ack(
+       struct iscsi_conn *conn,
+       u32 targ_xfer_tag,
+       u32 begrun,
+       u32 runlength)
+{
+       struct iscsi_cmd *cmd = NULL;
+
+       cmd = iscsit_find_cmd_from_ttt(conn, targ_xfer_tag);
+       if (!cmd) {
+               pr_err("Data ACK SNACK for TTT: 0x%08x is"
+                       " invalid.\n", targ_xfer_tag);
+               return -1;
+       }
+
+       if (begrun <= cmd->acked_data_sn) {
+               pr_err("ITT: 0x%08x Data ACK SNACK BegRUN: 0x%08x is"
+                       " less than the already acked DataSN: 0x%08x.\n",
+                       cmd->init_task_tag, begrun, cmd->acked_data_sn);
+               return -1;
+       }
+
+       /*
+        * For Data ACK SNACK, BegRun is the next expected DataSN.
+        * (see iSCSI v19: 10.16.6)
+        */
+       cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+       cmd->acked_data_sn = (begrun - 1);
+
+       pr_debug("Received Data ACK SNACK for ITT: 0x%08x,"
+               " updated acked DataSN to 0x%08x.\n",
+                       cmd->init_task_tag, cmd->acked_data_sn);
+
+       return 0;
+}
+
+static int iscsit_send_recovery_r2t(
+       struct iscsi_cmd *cmd,
+       u32 offset,
+       u32 xfer_len)
+{
+       int ret;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       ret = iscsit_add_r2t_to_list(cmd, offset, xfer_len, 1, 0);
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       return ret;
+}
+
+int iscsit_dataout_datapduinorder_no_fbit(
+       struct iscsi_cmd *cmd,
+       struct iscsi_pdu *pdu)
+{
+       int i, send_recovery_r2t = 0, recovery = 0;
+       u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_pdu *first_pdu = NULL;
+
+       /*
+        * Get an struct iscsi_pdu pointer to the first PDU, and total PDU count
+        * of the DataOUT sequence.
+        */
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               for (i = 0; i < cmd->pdu_count; i++) {
+                       if (cmd->pdu_list[i].seq_no == pdu->seq_no) {
+                               if (!first_pdu)
+                                       first_pdu = &cmd->pdu_list[i];
+                                xfer_len += cmd->pdu_list[i].length;
+                                pdu_count++;
+                       } else if (pdu_count)
+                               break;
+               }
+       } else {
+               struct iscsi_seq *seq = cmd->seq_ptr;
+
+               first_pdu = &cmd->pdu_list[seq->pdu_start];
+               pdu_count = seq->pdu_count;
+       }
+
+       if (!first_pdu || !pdu_count)
+               return DATAOUT_CANNOT_RECOVER;
+
+       /*
+        * Loop through the ending DataOUT Sequence checking each struct iscsi_pdu.
+        * The following ugly logic does batching of not received PDUs.
+        */
+       for (i = 0; i < pdu_count; i++) {
+               if (first_pdu[i].status == ISCSI_PDU_RECEIVED_OK) {
+                       if (!send_recovery_r2t)
+                               continue;
+
+                       if (iscsit_send_recovery_r2t(cmd, offset, length) < 0)
+                               return DATAOUT_CANNOT_RECOVER;
+
+                       send_recovery_r2t = length = offset = 0;
+                       continue;
+               }
+               /*
+                * Set recovery = 1 for any missing, CRC failed, or timed
+                * out PDUs to let the DataOUT logic know that this sequence
+                * has not been completed yet.
+                *
+                * Also, only send a Recovery R2T for ISCSI_PDU_NOT_RECEIVED.
+                * We assume if the PDU either failed CRC or timed out
+                * that a Recovery R2T has already been sent.
+                */
+               recovery = 1;
+
+               if (first_pdu[i].status != ISCSI_PDU_NOT_RECEIVED)
+                       continue;
+
+               if (!offset)
+                       offset = first_pdu[i].offset;
+               length += first_pdu[i].length;
+
+               send_recovery_r2t = 1;
+       }
+
+       if (send_recovery_r2t)
+               if (iscsit_send_recovery_r2t(cmd, offset, length) < 0)
+                       return DATAOUT_CANNOT_RECOVER;
+
+       return (!recovery) ? DATAOUT_NORMAL : DATAOUT_WITHIN_COMMAND_RECOVERY;
+}
+
+static int iscsit_recalculate_dataout_values(
+       struct iscsi_cmd *cmd,
+       u32 pdu_offset,
+       u32 pdu_length,
+       u32 *r2t_offset,
+       u32 *r2t_length)
+{
+       int i;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_pdu *pdu = NULL;
+
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               cmd->data_sn = 0;
+
+               if (conn->sess->sess_ops->DataPDUInOrder) {
+                       *r2t_offset = cmd->write_data_done;
+                       *r2t_length = (cmd->seq_end_offset -
+                                       cmd->write_data_done);
+                       return 0;
+               }
+
+               *r2t_offset = cmd->seq_start_offset;
+               *r2t_length = (cmd->seq_end_offset - cmd->seq_start_offset);
+
+               for (i = 0; i < cmd->pdu_count; i++) {
+                       pdu = &cmd->pdu_list[i];
+
+                       if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+                               continue;
+
+                       if ((pdu->offset >= cmd->seq_start_offset) &&
+                          ((pdu->offset + pdu->length) <=
+                            cmd->seq_end_offset)) {
+                               if (!cmd->unsolicited_data)
+                                       cmd->next_burst_len -= pdu->length;
+                               else
+                                       cmd->first_burst_len -= pdu->length;
+
+                               cmd->write_data_done -= pdu->length;
+                               pdu->status = ISCSI_PDU_NOT_RECEIVED;
+                       }
+               }
+       } else {
+               struct iscsi_seq *seq = NULL;
+
+               seq = iscsit_get_seq_holder(cmd, pdu_offset, pdu_length);
+               if (!seq)
+                       return -1;
+
+               *r2t_offset = seq->orig_offset;
+               *r2t_length = seq->xfer_len;
+
+               cmd->write_data_done -= (seq->offset - seq->orig_offset);
+               if (cmd->immediate_data)
+                       cmd->first_burst_len = cmd->write_data_done;
+
+               seq->data_sn = 0;
+               seq->offset = seq->orig_offset;
+               seq->next_burst_len = 0;
+               seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+               if (conn->sess->sess_ops->DataPDUInOrder)
+                       return 0;
+
+               for (i = 0; i < seq->pdu_count; i++) {
+                       pdu = &cmd->pdu_list[i+seq->pdu_start];
+
+                       if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+                               continue;
+
+                       pdu->status = ISCSI_PDU_NOT_RECEIVED;
+               }
+       }
+
+       return 0;
+}
+
+int iscsit_recover_dataout_sequence(
+       struct iscsi_cmd *cmd,
+       u32 pdu_offset,
+       u32 pdu_length)
+{
+       u32 r2t_length = 0, r2t_offset = 0;
+
+       spin_lock_bh(&cmd->istate_lock);
+       cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY;
+       spin_unlock_bh(&cmd->istate_lock);
+
+       if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length,
+                       &r2t_offset, &r2t_length) < 0)
+               return DATAOUT_CANNOT_RECOVER;
+
+       iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length);
+
+       return DATAOUT_WITHIN_COMMAND_RECOVERY;
+}
+
+static struct iscsi_ooo_cmdsn *iscsit_allocate_ooo_cmdsn(void)
+{
+       struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL;
+
+       ooo_cmdsn = kmem_cache_zalloc(lio_ooo_cache, GFP_ATOMIC);
+       if (!ooo_cmdsn) {
+               pr_err("Unable to allocate memory for"
+                       " struct iscsi_ooo_cmdsn.\n");
+               return NULL;
+       }
+       INIT_LIST_HEAD(&ooo_cmdsn->ooo_list);
+
+       return ooo_cmdsn;
+}
+
+/*
+ *     Called with sess->cmdsn_mutex held.
+ */
+static int iscsit_attach_ooo_cmdsn(
+       struct iscsi_session *sess,
+       struct iscsi_ooo_cmdsn *ooo_cmdsn)
+{
+       struct iscsi_ooo_cmdsn *ooo_tail, *ooo_tmp;
+       /*
+        * We attach the struct iscsi_ooo_cmdsn entry to the out of order
+        * list in increasing CmdSN order.
+        * This allows iscsi_execute_ooo_cmdsns() to detect any
+        * additional CmdSN holes while performing delayed execution.
+        */
+       if (list_empty(&sess->sess_ooo_cmdsn_list))
+               list_add_tail(&ooo_cmdsn->ooo_list,
+                               &sess->sess_ooo_cmdsn_list);
+       else {
+               ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev,
+                               typeof(*ooo_tail), ooo_list);
+               /*
+                * CmdSN is greater than the tail of the list.
+                */
+               if (ooo_tail->cmdsn < ooo_cmdsn->cmdsn)
+                       list_add_tail(&ooo_cmdsn->ooo_list,
+                                       &sess->sess_ooo_cmdsn_list);
+               else {
+                       /*
+                        * CmdSN is either lower than the head,  or somewhere
+                        * in the middle.
+                        */
+                       list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
+                                               ooo_list) {
+                               while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+                                       continue;
+
+                               list_add(&ooo_cmdsn->ooo_list,
+                                       &ooo_tmp->ooo_list);
+                               break;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+/*
+ *     Removes an struct iscsi_ooo_cmdsn from a session's list,
+ *     called with struct iscsi_session->cmdsn_mutex held.
+ */
+void iscsit_remove_ooo_cmdsn(
+       struct iscsi_session *sess,
+       struct iscsi_ooo_cmdsn *ooo_cmdsn)
+{
+       list_del(&ooo_cmdsn->ooo_list);
+       kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+}
+
+void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
+{
+       struct iscsi_ooo_cmdsn *ooo_cmdsn;
+       struct iscsi_session *sess = conn->sess;
+
+       mutex_lock(&sess->cmdsn_mutex);
+       list_for_each_entry(ooo_cmdsn, &sess->sess_ooo_cmdsn_list, ooo_list) {
+               if (ooo_cmdsn->cid != conn->cid)
+                       continue;
+
+               ooo_cmdsn->cmd = NULL;
+       }
+       mutex_unlock(&sess->cmdsn_mutex);
+}
+
+/*
+ *     Called with sess->cmdsn_mutex held.
+ */
+int iscsit_execute_ooo_cmdsns(struct iscsi_session *sess)
+{
+       int ooo_count = 0;
+       struct iscsi_cmd *cmd = NULL;
+       struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+
+       list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+                               &sess->sess_ooo_cmdsn_list, ooo_list) {
+               if (ooo_cmdsn->cmdsn != sess->exp_cmd_sn)
+                       continue;
+
+               if (!ooo_cmdsn->cmd) {
+                       sess->exp_cmd_sn++;
+                       iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+                       continue;
+               }
+
+               cmd = ooo_cmdsn->cmd;
+               cmd->i_state = cmd->deferred_i_state;
+               ooo_count++;
+               sess->exp_cmd_sn++;
+               pr_debug("Executing out of order CmdSN: 0x%08x,"
+                       " incremented ExpCmdSN to 0x%08x.\n",
+                       cmd->cmd_sn, sess->exp_cmd_sn);
+
+               iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+
+               if (iscsit_execute_cmd(cmd, 1) < 0)
+                       return -1;
+
+               continue;
+       }
+
+       return ooo_count;
+}
+
+/*
+ *     Called either:
+ *
+ *     1. With sess->cmdsn_mutex held from iscsi_execute_ooo_cmdsns()
+ *     or iscsi_check_received_cmdsn().
+ *     2. With no locks held directly from iscsi_handle_XXX_pdu() functions
+ *     for immediate commands.
+ */
+int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
+{
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+       int lr = 0;
+
+       spin_lock_bh(&cmd->istate_lock);
+       if (ooo)
+               cmd->cmd_flags &= ~ICF_OOO_CMDSN;
+
+       switch (cmd->iscsi_opcode) {
+       case ISCSI_OP_SCSI_CMD:
+               /*
+                * Go ahead and send the CHECK_CONDITION status for
+                * any SCSI CDB exceptions that may have occurred, also
+                * handle the SCF_SCSI_RESERVATION_CONFLICT case here as well.
+                */
+               if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) {
+                       if (se_cmd->se_cmd_flags &
+                                       SCF_SCSI_RESERVATION_CONFLICT) {
+                               cmd->i_state = ISTATE_SEND_STATUS;
+                               spin_unlock_bh(&cmd->istate_lock);
+                               iscsit_add_cmd_to_response_queue(cmd, cmd->conn,
+                                               cmd->i_state);
+                               return 0;
+                       }
+                       spin_unlock_bh(&cmd->istate_lock);
+                       /*
+                        * Determine if delayed TASK_ABORTED status for WRITEs
+                        * should be sent now if no unsolicited data out
+                        * payloads are expected, or if the delayed status
+                        * should be sent after unsolicited data out with
+                        * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out()
+                        */
+                       if (transport_check_aborted_status(se_cmd,
+                                       (cmd->unsolicited_data == 0)) != 0)
+                               return 0;
+                       /*
+                        * Otherwise send CHECK_CONDITION and sense for
+                        * exception
+                        */
+                       return transport_send_check_condition_and_sense(se_cmd,
+                                       se_cmd->scsi_sense_reason, 0);
+               }
+               /*
+                * Special case for delayed CmdSN with Immediate
+                * Data and/or Unsolicited Data Out attached.
+                */
+               if (cmd->immediate_data) {
+                       if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+                               spin_unlock_bh(&cmd->istate_lock);
+                               return transport_generic_handle_data(
+                                               &cmd->se_cmd);
+                       }
+                       spin_unlock_bh(&cmd->istate_lock);
+
+                       if (!(cmd->cmd_flags &
+                                       ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
+                               /*
+                                * Send the delayed TASK_ABORTED status for
+                                * WRITEs if no more unsolicitied data is
+                                * expected.
+                                */
+                               if (transport_check_aborted_status(se_cmd, 1)
+                                               != 0)
+                                       return 0;
+
+                               iscsit_set_dataout_sequence_values(cmd);
+                               iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 0);
+                       }
+                       return 0;
+               }
+               /*
+                * The default handler.
+                */
+               spin_unlock_bh(&cmd->istate_lock);
+
+               if ((cmd->data_direction == DMA_TO_DEVICE) &&
+                   !(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
+                       /*
+                        * Send the delayed TASK_ABORTED status for WRITEs if
+                        * no more nsolicitied data is expected.
+                        */
+                       if (transport_check_aborted_status(se_cmd, 1) != 0)
+                               return 0;
+
+                       iscsit_set_dataout_sequence_values(cmd);
+                       spin_lock_bh(&cmd->dataout_timeout_lock);
+                       iscsit_start_dataout_timer(cmd, cmd->conn);
+                       spin_unlock_bh(&cmd->dataout_timeout_lock);
+               }
+               return transport_handle_cdb_direct(&cmd->se_cmd);
+
+       case ISCSI_OP_NOOP_OUT:
+       case ISCSI_OP_TEXT:
+               spin_unlock_bh(&cmd->istate_lock);
+               iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+               break;
+       case ISCSI_OP_SCSI_TMFUNC:
+               if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) {
+                       spin_unlock_bh(&cmd->istate_lock);
+                       iscsit_add_cmd_to_response_queue(cmd, cmd->conn,
+                                       cmd->i_state);
+                       return 0;
+               }
+               spin_unlock_bh(&cmd->istate_lock);
+
+               return transport_generic_handle_tmr(&cmd->se_cmd);
+       case ISCSI_OP_LOGOUT:
+               spin_unlock_bh(&cmd->istate_lock);
+               switch (cmd->logout_reason) {
+               case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+                       lr = iscsit_logout_closesession(cmd, cmd->conn);
+                       break;
+               case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+                       lr = iscsit_logout_closeconnection(cmd, cmd->conn);
+                       break;
+               case ISCSI_LOGOUT_REASON_RECOVERY:
+                       lr = iscsit_logout_removeconnforrecovery(cmd, cmd->conn);
+                       break;
+               default:
+                       pr_err("Unknown iSCSI Logout Request Code:"
+                               " 0x%02x\n", cmd->logout_reason);
+                       return -1;
+               }
+
+               return lr;
+       default:
+               spin_unlock_bh(&cmd->istate_lock);
+               pr_err("Cannot perform out of order execution for"
+               " unknown iSCSI Opcode: 0x%02x\n", cmd->iscsi_opcode);
+               return -1;
+       }
+
+       return 0;
+}
+
+void iscsit_free_all_ooo_cmdsns(struct iscsi_session *sess)
+{
+       struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+
+       mutex_lock(&sess->cmdsn_mutex);
+       list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+                       &sess->sess_ooo_cmdsn_list, ooo_list) {
+
+               list_del(&ooo_cmdsn->ooo_list);
+               kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+       }
+       mutex_unlock(&sess->cmdsn_mutex);
+}
+
+int iscsit_handle_ooo_cmdsn(
+       struct iscsi_session *sess,
+       struct iscsi_cmd *cmd,
+       u32 cmdsn)
+{
+       int batch = 0;
+       struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL, *ooo_tail = NULL;
+
+       cmd->deferred_i_state           = cmd->i_state;
+       cmd->i_state                    = ISTATE_DEFERRED_CMD;
+       cmd->cmd_flags                  |= ICF_OOO_CMDSN;
+
+       if (list_empty(&sess->sess_ooo_cmdsn_list))
+               batch = 1;
+       else {
+               ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev,
+                               typeof(*ooo_tail), ooo_list);
+               if (ooo_tail->cmdsn != (cmdsn - 1))
+                       batch = 1;
+       }
+
+       ooo_cmdsn = iscsit_allocate_ooo_cmdsn();
+       if (!ooo_cmdsn)
+               return CMDSN_ERROR_CANNOT_RECOVER;
+
+       ooo_cmdsn->cmd                  = cmd;
+       ooo_cmdsn->batch_count          = (batch) ?
+                                         (cmdsn - sess->exp_cmd_sn) : 1;
+       ooo_cmdsn->cid                  = cmd->conn->cid;
+       ooo_cmdsn->exp_cmdsn            = sess->exp_cmd_sn;
+       ooo_cmdsn->cmdsn                = cmdsn;
+
+       if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) {
+               kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+               return CMDSN_ERROR_CANNOT_RECOVER;
+       }
+
+       return CMDSN_HIGHER_THAN_EXP;
+}
+
+static int iscsit_set_dataout_timeout_values(
+       struct iscsi_cmd *cmd,
+       u32 *offset,
+       u32 *length)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_r2t *r2t;
+
+       if (cmd->unsolicited_data) {
+               *offset = 0;
+               *length = (conn->sess->sess_ops->FirstBurstLength >
+                          cmd->data_length) ?
+                          cmd->data_length :
+                          conn->sess->sess_ops->FirstBurstLength;
+               return 0;
+       }
+
+       spin_lock_bh(&cmd->r2t_lock);
+       if (list_empty(&cmd->cmd_r2t_list)) {
+               pr_err("cmd->cmd_r2t_list is empty!\n");
+               spin_unlock_bh(&cmd->r2t_lock);
+               return -1;
+       }
+
+       list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+               if (r2t->sent_r2t && !r2t->recovery_r2t && !r2t->seq_complete) {
+                       *offset = r2t->offset;
+                       *length = r2t->xfer_len;
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return 0;
+               }
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       pr_err("Unable to locate any incomplete DataOUT"
+               " sequences for ITT: 0x%08x.\n", cmd->init_task_tag);
+
+       return -1;
+}
+
+/*
+ *     NOTE: Called from interrupt (timer) context.
+ */
+static void iscsit_handle_dataout_timeout(unsigned long data)
+{
+       u32 pdu_length = 0, pdu_offset = 0;
+       u32 r2t_length = 0, r2t_offset = 0;
+       struct iscsi_cmd *cmd = (struct iscsi_cmd *) data;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_session *sess = NULL;
+       struct iscsi_node_attrib *na;
+
+       iscsit_inc_conn_usage_count(conn);
+
+       spin_lock_bh(&cmd->dataout_timeout_lock);
+       if (cmd->dataout_timer_flags & ISCSI_TF_STOP) {
+               spin_unlock_bh(&cmd->dataout_timeout_lock);
+               iscsit_dec_conn_usage_count(conn);
+               return;
+       }
+       cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING;
+       sess = conn->sess;
+       na = iscsit_tpg_get_node_attrib(sess);
+
+       if (!sess->sess_ops->ErrorRecoveryLevel) {
+               pr_debug("Unable to recover from DataOut timeout while"
+                       " in ERL=0.\n");
+               goto failure;
+       }
+
+       if (++cmd->dataout_timeout_retries == na->dataout_timeout_retries) {
+               pr_debug("Command ITT: 0x%08x exceeded max retries"
+                       " for DataOUT timeout %u, closing iSCSI connection.\n",
+                       cmd->init_task_tag, na->dataout_timeout_retries);
+               goto failure;
+       }
+
+       cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY;
+
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               if (conn->sess->sess_ops->DataPDUInOrder) {
+                       pdu_offset = cmd->write_data_done;
+                       if ((pdu_offset + (conn->sess->sess_ops->MaxBurstLength -
+                            cmd->next_burst_len)) > cmd->data_length)
+                               pdu_length = (cmd->data_length -
+                                       cmd->write_data_done);
+                       else
+                               pdu_length = (conn->sess->sess_ops->MaxBurstLength -
+                                               cmd->next_burst_len);
+               } else {
+                       pdu_offset = cmd->seq_start_offset;
+                       pdu_length = (cmd->seq_end_offset -
+                               cmd->seq_start_offset);
+               }
+       } else {
+               if (iscsit_set_dataout_timeout_values(cmd, &pdu_offset,
+                               &pdu_length) < 0)
+                       goto failure;
+       }
+
+       if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length,
+                       &r2t_offset, &r2t_length) < 0)
+               goto failure;
+
+       pr_debug("Command ITT: 0x%08x timed out waiting for"
+               " completion of %sDataOUT Sequence Offset: %u, Length: %u\n",
+               cmd->init_task_tag, (cmd->unsolicited_data) ? "Unsolicited " :
+               "", r2t_offset, r2t_length);
+
+       if (iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length) < 0)
+               goto failure;
+
+       iscsit_start_dataout_timer(cmd, conn);
+       spin_unlock_bh(&cmd->dataout_timeout_lock);
+       iscsit_dec_conn_usage_count(conn);
+
+       return;
+
+failure:
+       spin_unlock_bh(&cmd->dataout_timeout_lock);
+       iscsit_cause_connection_reinstatement(conn, 0);
+       iscsit_dec_conn_usage_count(conn);
+}
+
+void iscsit_mod_dataout_timer(struct iscsi_cmd *cmd)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess);
+
+       spin_lock_bh(&cmd->dataout_timeout_lock);
+       if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) {
+               spin_unlock_bh(&cmd->dataout_timeout_lock);
+               return;
+       }
+
+       mod_timer(&cmd->dataout_timer,
+               (get_jiffies_64() + na->dataout_timeout * HZ));
+       pr_debug("Updated DataOUT timer for ITT: 0x%08x",
+                       cmd->init_task_tag);
+       spin_unlock_bh(&cmd->dataout_timeout_lock);
+}
+
+/*
+ *     Called with cmd->dataout_timeout_lock held.
+ */
+void iscsit_start_dataout_timer(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess);
+
+       if (cmd->dataout_timer_flags & ISCSI_TF_RUNNING)
+               return;
+
+       pr_debug("Starting DataOUT timer for ITT: 0x%08x on"
+               " CID: %hu.\n", cmd->init_task_tag, conn->cid);
+
+       init_timer(&cmd->dataout_timer);
+       cmd->dataout_timer.expires = (get_jiffies_64() + na->dataout_timeout * HZ);
+       cmd->dataout_timer.data = (unsigned long)cmd;
+       cmd->dataout_timer.function = iscsit_handle_dataout_timeout;
+       cmd->dataout_timer_flags &= ~ISCSI_TF_STOP;
+       cmd->dataout_timer_flags |= ISCSI_TF_RUNNING;
+       add_timer(&cmd->dataout_timer);
+}
+
+void iscsit_stop_dataout_timer(struct iscsi_cmd *cmd)
+{
+       spin_lock_bh(&cmd->dataout_timeout_lock);
+       if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) {
+               spin_unlock_bh(&cmd->dataout_timeout_lock);
+               return;
+       }
+       cmd->dataout_timer_flags |= ISCSI_TF_STOP;
+       spin_unlock_bh(&cmd->dataout_timeout_lock);
+
+       del_timer_sync(&cmd->dataout_timer);
+
+       spin_lock_bh(&cmd->dataout_timeout_lock);
+       cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING;
+       pr_debug("Stopped DataOUT Timer for ITT: 0x%08x\n",
+                       cmd->init_task_tag);
+       spin_unlock_bh(&cmd->dataout_timeout_lock);
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl1.h b/drivers/target/iscsi/iscsi_target_erl1.h
new file mode 100644 (file)
index 0000000..85e67e2
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef ISCSI_TARGET_ERL1_H
+#define ISCSI_TARGET_ERL1_H
+
+extern int iscsit_dump_data_payload(struct iscsi_conn *, u32, int);
+extern int iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+                       struct iscsi_cmd *, struct iscsi_datain_req *);
+extern int iscsit_create_recovery_datain_values_datasequenceinorder_no(
+                       struct iscsi_cmd *, struct iscsi_datain_req *);
+extern int iscsit_handle_recovery_datain_or_r2t(struct iscsi_conn *, unsigned char *,
+                       u32, u32, u32, u32);
+extern int iscsit_handle_status_snack(struct iscsi_conn *, u32, u32,
+                       u32, u32);
+extern int iscsit_handle_data_ack(struct iscsi_conn *, u32, u32, u32);
+extern int iscsit_dataout_datapduinorder_no_fbit(struct iscsi_cmd *, struct iscsi_pdu *);
+extern int iscsit_recover_dataout_sequence(struct iscsi_cmd *, u32, u32);
+extern void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *);
+extern void iscsit_free_all_ooo_cmdsns(struct iscsi_session *);
+extern int iscsit_execute_ooo_cmdsns(struct iscsi_session *);
+extern int iscsit_execute_cmd(struct iscsi_cmd *, int);
+extern int iscsit_handle_ooo_cmdsn(struct iscsi_session *, struct iscsi_cmd *, u32);
+extern void iscsit_remove_ooo_cmdsn(struct iscsi_session *, struct iscsi_ooo_cmdsn *);
+extern void iscsit_mod_dataout_timer(struct iscsi_cmd *);
+extern void iscsit_start_dataout_timer(struct iscsi_cmd *, struct iscsi_conn *);
+extern void iscsit_stop_dataout_timer(struct iscsi_cmd *);
+
+#endif /* ISCSI_TARGET_ERL1_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c
new file mode 100644 (file)
index 0000000..91a4d17
--- /dev/null
@@ -0,0 +1,474 @@
+/*******************************************************************************
+ * This file contains error recovery level two functions used by
+ * the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target.h"
+
+/*
+ *     FIXME: Does RData SNACK apply here as well?
+ */
+void iscsit_create_conn_recovery_datain_values(
+       struct iscsi_cmd *cmd,
+       u32 exp_data_sn)
+{
+       u32 data_sn = 0;
+       struct iscsi_conn *conn = cmd->conn;
+
+       cmd->next_burst_len = 0;
+       cmd->read_data_done = 0;
+
+       while (exp_data_sn > data_sn) {
+               if ((cmd->next_burst_len +
+                    conn->conn_ops->MaxRecvDataSegmentLength) <
+                    conn->sess->sess_ops->MaxBurstLength) {
+                       cmd->read_data_done +=
+                              conn->conn_ops->MaxRecvDataSegmentLength;
+                       cmd->next_burst_len +=
+                              conn->conn_ops->MaxRecvDataSegmentLength;
+               } else {
+                       cmd->read_data_done +=
+                               (conn->sess->sess_ops->MaxBurstLength -
+                               cmd->next_burst_len);
+                       cmd->next_burst_len = 0;
+               }
+               data_sn++;
+       }
+}
+
+void iscsit_create_conn_recovery_dataout_values(
+       struct iscsi_cmd *cmd)
+{
+       u32 write_data_done = 0;
+       struct iscsi_conn *conn = cmd->conn;
+
+       cmd->data_sn = 0;
+       cmd->next_burst_len = 0;
+
+       while (cmd->write_data_done > write_data_done) {
+               if ((write_data_done + conn->sess->sess_ops->MaxBurstLength) <=
+                    cmd->write_data_done)
+                       write_data_done += conn->sess->sess_ops->MaxBurstLength;
+               else
+                       break;
+       }
+
+       cmd->write_data_done = write_data_done;
+}
+
+static int iscsit_attach_active_connection_recovery_entry(
+       struct iscsi_session *sess,
+       struct iscsi_conn_recovery *cr)
+{
+       spin_lock(&sess->cr_a_lock);
+       list_add_tail(&cr->cr_list, &sess->cr_active_list);
+       spin_unlock(&sess->cr_a_lock);
+
+       return 0;
+}
+
+static int iscsit_attach_inactive_connection_recovery_entry(
+       struct iscsi_session *sess,
+       struct iscsi_conn_recovery *cr)
+{
+       spin_lock(&sess->cr_i_lock);
+       list_add_tail(&cr->cr_list, &sess->cr_inactive_list);
+
+       sess->conn_recovery_count++;
+       pr_debug("Incremented connection recovery count to %u for"
+               " SID: %u\n", sess->conn_recovery_count, sess->sid);
+       spin_unlock(&sess->cr_i_lock);
+
+       return 0;
+}
+
+struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry(
+       struct iscsi_session *sess,
+       u16 cid)
+{
+       struct iscsi_conn_recovery *cr;
+
+       spin_lock(&sess->cr_i_lock);
+       list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) {
+               if (cr->cid == cid) {
+                       spin_unlock(&sess->cr_i_lock);
+                       return cr;
+               }
+       }
+       spin_unlock(&sess->cr_i_lock);
+
+       return NULL;
+}
+
+void iscsit_free_connection_recovery_entires(struct iscsi_session *sess)
+{
+       struct iscsi_cmd *cmd, *cmd_tmp;
+       struct iscsi_conn_recovery *cr, *cr_tmp;
+
+       spin_lock(&sess->cr_a_lock);
+       list_for_each_entry_safe(cr, cr_tmp, &sess->cr_active_list, cr_list) {
+               list_del(&cr->cr_list);
+               spin_unlock(&sess->cr_a_lock);
+
+               spin_lock(&cr->conn_recovery_cmd_lock);
+               list_for_each_entry_safe(cmd, cmd_tmp,
+                               &cr->conn_recovery_cmd_list, i_list) {
+
+                       list_del(&cmd->i_list);
+                       cmd->conn = NULL;
+                       spin_unlock(&cr->conn_recovery_cmd_lock);
+                       if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+                           !(cmd->se_cmd.transport_wait_for_tasks))
+                               iscsit_release_cmd(cmd);
+                       else
+                               cmd->se_cmd.transport_wait_for_tasks(
+                                               &cmd->se_cmd, 1, 1);
+                       spin_lock(&cr->conn_recovery_cmd_lock);
+               }
+               spin_unlock(&cr->conn_recovery_cmd_lock);
+               spin_lock(&sess->cr_a_lock);
+
+               kfree(cr);
+       }
+       spin_unlock(&sess->cr_a_lock);
+
+       spin_lock(&sess->cr_i_lock);
+       list_for_each_entry_safe(cr, cr_tmp, &sess->cr_inactive_list, cr_list) {
+               list_del(&cr->cr_list);
+               spin_unlock(&sess->cr_i_lock);
+
+               spin_lock(&cr->conn_recovery_cmd_lock);
+               list_for_each_entry_safe(cmd, cmd_tmp,
+                               &cr->conn_recovery_cmd_list, i_list) {
+
+                       list_del(&cmd->i_list);
+                       cmd->conn = NULL;
+                       spin_unlock(&cr->conn_recovery_cmd_lock);
+                       if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+                           !(cmd->se_cmd.transport_wait_for_tasks))
+                               iscsit_release_cmd(cmd);
+                       else
+                               cmd->se_cmd.transport_wait_for_tasks(
+                                               &cmd->se_cmd, 1, 1);
+                       spin_lock(&cr->conn_recovery_cmd_lock);
+               }
+               spin_unlock(&cr->conn_recovery_cmd_lock);
+               spin_lock(&sess->cr_i_lock);
+
+               kfree(cr);
+       }
+       spin_unlock(&sess->cr_i_lock);
+}
+
+int iscsit_remove_active_connection_recovery_entry(
+       struct iscsi_conn_recovery *cr,
+       struct iscsi_session *sess)
+{
+       spin_lock(&sess->cr_a_lock);
+       list_del(&cr->cr_list);
+
+       sess->conn_recovery_count--;
+       pr_debug("Decremented connection recovery count to %u for"
+               " SID: %u\n", sess->conn_recovery_count, sess->sid);
+       spin_unlock(&sess->cr_a_lock);
+
+       kfree(cr);
+
+       return 0;
+}
+
+int iscsit_remove_inactive_connection_recovery_entry(
+       struct iscsi_conn_recovery *cr,
+       struct iscsi_session *sess)
+{
+       spin_lock(&sess->cr_i_lock);
+       list_del(&cr->cr_list);
+       spin_unlock(&sess->cr_i_lock);
+
+       return 0;
+}
+
+/*
+ *     Called with cr->conn_recovery_cmd_lock help.
+ */
+int iscsit_remove_cmd_from_connection_recovery(
+       struct iscsi_cmd *cmd,
+       struct iscsi_session *sess)
+{
+       struct iscsi_conn_recovery *cr;
+
+       if (!cmd->cr) {
+               pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+                       " is NULL!\n", cmd->init_task_tag);
+               BUG();
+       }
+       cr = cmd->cr;
+
+       list_del(&cmd->i_list);
+       return --cr->cmd_count;
+}
+
+void iscsit_discard_cr_cmds_by_expstatsn(
+       struct iscsi_conn_recovery *cr,
+       u32 exp_statsn)
+{
+       u32 dropped_count = 0;
+       struct iscsi_cmd *cmd, *cmd_tmp;
+       struct iscsi_session *sess = cr->sess;
+
+       spin_lock(&cr->conn_recovery_cmd_lock);
+       list_for_each_entry_safe(cmd, cmd_tmp,
+                       &cr->conn_recovery_cmd_list, i_list) {
+
+               if (((cmd->deferred_i_state != ISTATE_SENT_STATUS) &&
+                    (cmd->deferred_i_state != ISTATE_REMOVE)) ||
+                    (cmd->stat_sn >= exp_statsn)) {
+                       continue;
+               }
+
+               dropped_count++;
+               pr_debug("Dropping Acknowledged ITT: 0x%08x, StatSN:"
+                       " 0x%08x, CID: %hu.\n", cmd->init_task_tag,
+                               cmd->stat_sn, cr->cid);
+
+               iscsit_remove_cmd_from_connection_recovery(cmd, sess);
+
+               spin_unlock(&cr->conn_recovery_cmd_lock);
+               if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+                   !(cmd->se_cmd.transport_wait_for_tasks))
+                       iscsit_release_cmd(cmd);
+               else
+                       cmd->se_cmd.transport_wait_for_tasks(
+                                       &cmd->se_cmd, 1, 0);
+               spin_lock(&cr->conn_recovery_cmd_lock);
+       }
+       spin_unlock(&cr->conn_recovery_cmd_lock);
+
+       pr_debug("Dropped %u total acknowledged commands on"
+               " CID: %hu less than old ExpStatSN: 0x%08x\n",
+                       dropped_count, cr->cid, exp_statsn);
+
+       if (!cr->cmd_count) {
+               pr_debug("No commands to be reassigned for failed"
+                       " connection CID: %hu on SID: %u\n",
+                       cr->cid, sess->sid);
+               iscsit_remove_inactive_connection_recovery_entry(cr, sess);
+               iscsit_attach_active_connection_recovery_entry(sess, cr);
+               pr_debug("iSCSI connection recovery successful for CID:"
+                       " %hu on SID: %u\n", cr->cid, sess->sid);
+               iscsit_remove_active_connection_recovery_entry(cr, sess);
+       } else {
+               iscsit_remove_inactive_connection_recovery_entry(cr, sess);
+               iscsit_attach_active_connection_recovery_entry(sess, cr);
+       }
+}
+
+int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
+{
+       u32 dropped_count = 0;
+       struct iscsi_cmd *cmd, *cmd_tmp;
+       struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+       struct iscsi_session *sess = conn->sess;
+
+       mutex_lock(&sess->cmdsn_mutex);
+       list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+                       &sess->sess_ooo_cmdsn_list, ooo_list) {
+
+               if (ooo_cmdsn->cid != conn->cid)
+                       continue;
+
+               dropped_count++;
+               pr_debug("Dropping unacknowledged CmdSN:"
+               " 0x%08x during connection recovery on CID: %hu\n",
+                       ooo_cmdsn->cmdsn, conn->cid);
+               iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+       }
+       mutex_unlock(&sess->cmdsn_mutex);
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+               if (!(cmd->cmd_flags & ICF_OOO_CMDSN))
+                       continue;
+
+               list_del(&cmd->i_list);
+
+               spin_unlock_bh(&conn->cmd_lock);
+               if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+                   !(cmd->se_cmd.transport_wait_for_tasks))
+                       iscsit_release_cmd(cmd);
+               else
+                       cmd->se_cmd.transport_wait_for_tasks(
+                                       &cmd->se_cmd, 1, 1);
+               spin_lock_bh(&conn->cmd_lock);
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+
+       pr_debug("Dropped %u total unacknowledged commands on CID:"
+               " %hu for ExpCmdSN: 0x%08x.\n", dropped_count, conn->cid,
+                               sess->exp_cmd_sn);
+       return 0;
+}
+
+int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
+{
+       u32 cmd_count = 0;
+       struct iscsi_cmd *cmd, *cmd_tmp;
+       struct iscsi_conn_recovery *cr;
+
+       /*
+        * Allocate an struct iscsi_conn_recovery for this connection.
+        * Each struct iscsi_cmd contains an struct iscsi_conn_recovery pointer
+        * (struct iscsi_cmd->cr) so we need to allocate this before preparing the
+        * connection's command list for connection recovery.
+        */
+       cr = kzalloc(sizeof(struct iscsi_conn_recovery), GFP_KERNEL);
+       if (!cr) {
+               pr_err("Unable to allocate memory for"
+                       " struct iscsi_conn_recovery.\n");
+               return -1;
+       }
+       INIT_LIST_HEAD(&cr->cr_list);
+       INIT_LIST_HEAD(&cr->conn_recovery_cmd_list);
+       spin_lock_init(&cr->conn_recovery_cmd_lock);
+       /*
+        * Only perform connection recovery on ISCSI_OP_SCSI_CMD or
+        * ISCSI_OP_NOOP_OUT opcodes.  For all other opcodes call
+        * list_del(&cmd->i_list); to release the command to the
+        * session pool and remove it from the connection's list.
+        *
+        * Also stop the DataOUT timer, which will be restarted after
+        * sending the TMR response.
+        */
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+
+               if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) &&
+                   (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) {
+                       pr_debug("Not performing realligence on"
+                               " Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x,"
+                               " CID: %hu\n", cmd->iscsi_opcode,
+                               cmd->init_task_tag, cmd->cmd_sn, conn->cid);
+
+                       list_del(&cmd->i_list);
+                       spin_unlock_bh(&conn->cmd_lock);
+
+                       if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+                           !(cmd->se_cmd.transport_wait_for_tasks))
+                               iscsit_release_cmd(cmd);
+                       else
+                               cmd->se_cmd.transport_wait_for_tasks(
+                                               &cmd->se_cmd, 1, 0);
+                       spin_lock_bh(&conn->cmd_lock);
+                       continue;
+               }
+
+               /*
+                * Special case where commands greater than or equal to
+                * the session's ExpCmdSN are attached to the connection
+                * list but not to the out of order CmdSN list.  The one
+                * obvious case is when a command with immediate data
+                * attached must only check the CmdSN against ExpCmdSN
+                * after the data is received.  The special case below
+                * is when the connection fails before data is received,
+                * but also may apply to other PDUs, so it has been
+                * made generic here.
+                */
+               if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd &&
+                    (cmd->cmd_sn >= conn->sess->exp_cmd_sn)) {
+                       list_del(&cmd->i_list);
+                       spin_unlock_bh(&conn->cmd_lock);
+
+                       if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+                           !(cmd->se_cmd.transport_wait_for_tasks))
+                               iscsit_release_cmd(cmd);
+                       else
+                               cmd->se_cmd.transport_wait_for_tasks(
+                                               &cmd->se_cmd, 1, 1);
+                       spin_lock_bh(&conn->cmd_lock);
+                       continue;
+               }
+
+               cmd_count++;
+               pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x,"
+                       " CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for"
+                       " realligence.\n", cmd->iscsi_opcode,
+                       cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn,
+                       conn->cid);
+
+               cmd->deferred_i_state = cmd->i_state;
+               cmd->i_state = ISTATE_IN_CONNECTION_RECOVERY;
+
+               if (cmd->data_direction == DMA_TO_DEVICE)
+                       iscsit_stop_dataout_timer(cmd);
+
+               cmd->sess = conn->sess;
+
+               list_del(&cmd->i_list);
+               spin_unlock_bh(&conn->cmd_lock);
+
+               iscsit_free_all_datain_reqs(cmd);
+
+               if ((cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) &&
+                    cmd->se_cmd.transport_wait_for_tasks)
+                       cmd->se_cmd.transport_wait_for_tasks(&cmd->se_cmd,
+                                       0, 0);
+               /*
+                * Add the struct iscsi_cmd to the connection recovery cmd list
+                */
+               spin_lock(&cr->conn_recovery_cmd_lock);
+               list_add_tail(&cmd->i_list, &cr->conn_recovery_cmd_list);
+               spin_unlock(&cr->conn_recovery_cmd_lock);
+
+               spin_lock_bh(&conn->cmd_lock);
+               cmd->cr = cr;
+               cmd->conn = NULL;
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+       /*
+        * Fill in the various values in the preallocated struct iscsi_conn_recovery.
+        */
+       cr->cid = conn->cid;
+       cr->cmd_count = cmd_count;
+       cr->maxrecvdatasegmentlength = conn->conn_ops->MaxRecvDataSegmentLength;
+       cr->sess = conn->sess;
+
+       iscsit_attach_inactive_connection_recovery_entry(conn->sess, cr);
+
+       return 0;
+}
+
+int iscsit_connection_recovery_transport_reset(struct iscsi_conn *conn)
+{
+       atomic_set(&conn->connection_recovery, 1);
+
+       if (iscsit_close_connection(conn) < 0)
+               return -1;
+
+       return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h
new file mode 100644 (file)
index 0000000..22f8d24
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef ISCSI_TARGET_ERL2_H
+#define ISCSI_TARGET_ERL2_H
+
+extern void iscsit_create_conn_recovery_datain_values(struct iscsi_cmd *, u32);
+extern void iscsit_create_conn_recovery_dataout_values(struct iscsi_cmd *);
+extern struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry(
+                       struct iscsi_session *, u16);
+extern void iscsit_free_connection_recovery_entires(struct iscsi_session *);
+extern int iscsit_remove_active_connection_recovery_entry(
+                       struct iscsi_conn_recovery *, struct iscsi_session *);
+extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *,
+                       struct iscsi_session *);
+extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32);
+extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *);
+extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *);
+extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_ERL2_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
new file mode 100644 (file)
index 0000000..bcaf82f
--- /dev/null
@@ -0,0 +1,1232 @@
+/*******************************************************************************
+ * This file contains the login functions used by the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_stat.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_parameters.h"
+
+extern struct idr sess_idr;
+extern struct mutex auth_id_lock;
+extern spinlock_t sess_idr_lock;
+
+static int iscsi_login_init_conn(struct iscsi_conn *conn)
+{
+       INIT_LIST_HEAD(&conn->conn_list);
+       INIT_LIST_HEAD(&conn->conn_cmd_list);
+       INIT_LIST_HEAD(&conn->immed_queue_list);
+       INIT_LIST_HEAD(&conn->response_queue_list);
+       init_completion(&conn->conn_post_wait_comp);
+       init_completion(&conn->conn_wait_comp);
+       init_completion(&conn->conn_wait_rcfr_comp);
+       init_completion(&conn->conn_waiting_on_uc_comp);
+       init_completion(&conn->conn_logout_comp);
+       init_completion(&conn->rx_half_close_comp);
+       init_completion(&conn->tx_half_close_comp);
+       spin_lock_init(&conn->cmd_lock);
+       spin_lock_init(&conn->conn_usage_lock);
+       spin_lock_init(&conn->immed_queue_lock);
+       spin_lock_init(&conn->nopin_timer_lock);
+       spin_lock_init(&conn->response_queue_lock);
+       spin_lock_init(&conn->state_lock);
+
+       if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) {
+               pr_err("Unable to allocate conn->conn_cpumask\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/*
+ * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup
+ * per struct iscsi_conn libcrypto contexts for crc32c and crc32-intel
+ */
+int iscsi_login_setup_crypto(struct iscsi_conn *conn)
+{
+       /*
+        * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts
+        * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback
+        * to software 1x8 byte slicing from crc32c.ko
+        */
+       conn->conn_rx_hash.flags = 0;
+       conn->conn_rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+                                               CRYPTO_ALG_ASYNC);
+       if (IS_ERR(conn->conn_rx_hash.tfm)) {
+               pr_err("crypto_alloc_hash() failed for conn_rx_tfm\n");
+               return -ENOMEM;
+       }
+
+       conn->conn_tx_hash.flags = 0;
+       conn->conn_tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+                                               CRYPTO_ALG_ASYNC);
+       if (IS_ERR(conn->conn_tx_hash.tfm)) {
+               pr_err("crypto_alloc_hash() failed for conn_tx_tfm\n");
+               crypto_free_hash(conn->conn_rx_hash.tfm);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int iscsi_login_check_initiator_version(
+       struct iscsi_conn *conn,
+       u8 version_max,
+       u8 version_min)
+{
+       if ((version_max != 0x00) || (version_min != 0x00)) {
+               pr_err("Unsupported iSCSI IETF Pre-RFC Revision,"
+                       " version Min/Max 0x%02x/0x%02x, rejecting login.\n",
+                       version_min, version_max);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_NO_VERSION);
+               return -1;
+       }
+
+       return 0;
+}
+
+int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
+{
+       int sessiontype;
+       struct iscsi_param *initiatorname_param = NULL, *sessiontype_param = NULL;
+       struct iscsi_portal_group *tpg = conn->tpg;
+       struct iscsi_session *sess = NULL, *sess_p = NULL;
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+       struct se_session *se_sess, *se_sess_tmp;
+
+       initiatorname_param = iscsi_find_param_from_key(
+                       INITIATORNAME, conn->param_list);
+       if (!initiatorname_param)
+               return -1;
+
+       sessiontype_param = iscsi_find_param_from_key(
+                       SESSIONTYPE, conn->param_list);
+       if (!sessiontype_param)
+               return -1;
+
+       sessiontype = (strncmp(sessiontype_param->value, NORMAL, 6)) ? 1 : 0;
+
+       spin_lock_bh(&se_tpg->session_lock);
+       list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+                       sess_list) {
+
+               sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               spin_lock(&sess_p->conn_lock);
+               if (atomic_read(&sess_p->session_fall_back_to_erl0) ||
+                   atomic_read(&sess_p->session_logout) ||
+                   (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+                       spin_unlock(&sess_p->conn_lock);
+                       continue;
+               }
+               if (!memcmp((void *)sess_p->isid, (void *)conn->sess->isid, 6) &&
+                  (!strcmp((void *)sess_p->sess_ops->InitiatorName,
+                           (void *)initiatorname_param->value) &&
+                  (sess_p->sess_ops->SessionType == sessiontype))) {
+                       atomic_set(&sess_p->session_reinstatement, 1);
+                       spin_unlock(&sess_p->conn_lock);
+                       iscsit_inc_session_usage_count(sess_p);
+                       iscsit_stop_time2retain_timer(sess_p);
+                       sess = sess_p;
+                       break;
+               }
+               spin_unlock(&sess_p->conn_lock);
+       }
+       spin_unlock_bh(&se_tpg->session_lock);
+       /*
+        * If the Time2Retain handler has expired, the session is already gone.
+        */
+       if (!sess)
+               return 0;
+
+       pr_debug("%s iSCSI Session SID %u is still active for %s,"
+               " preforming session reinstatement.\n", (sessiontype) ?
+               "Discovery" : "Normal", sess->sid,
+               sess->sess_ops->InitiatorName);
+
+       spin_lock_bh(&sess->conn_lock);
+       if (sess->session_state == TARG_SESS_STATE_FAILED) {
+               spin_unlock_bh(&sess->conn_lock);
+               iscsit_dec_session_usage_count(sess);
+               return iscsit_close_session(sess);
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       iscsit_stop_session(sess, 1, 1);
+       iscsit_dec_session_usage_count(sess);
+
+       return iscsit_close_session(sess);
+}
+
+static void iscsi_login_set_conn_values(
+       struct iscsi_session *sess,
+       struct iscsi_conn *conn,
+       u16 cid)
+{
+       conn->sess              = sess;
+       conn->cid               = cid;
+       /*
+        * Generate a random Status sequence number (statsn) for the new
+        * iSCSI connection.
+        */
+       get_random_bytes(&conn->stat_sn, sizeof(u32));
+
+       mutex_lock(&auth_id_lock);
+       conn->auth_id           = iscsit_global->auth_id++;
+       mutex_unlock(&auth_id_lock);
+}
+
+/*
+ *     This is the leading connection of a new session,
+ *     or session reinstatement.
+ */
+static int iscsi_login_zero_tsih_s1(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       struct iscsi_session *sess = NULL;
+       struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+       sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL);
+       if (!sess) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               pr_err("Could not allocate memory for session\n");
+               return -1;
+       }
+
+       iscsi_login_set_conn_values(sess, conn, pdu->cid);
+       sess->init_task_tag     = pdu->itt;
+       memcpy((void *)&sess->isid, (void *)pdu->isid, 6);
+       sess->exp_cmd_sn        = pdu->cmdsn;
+       INIT_LIST_HEAD(&sess->sess_conn_list);
+       INIT_LIST_HEAD(&sess->sess_ooo_cmdsn_list);
+       INIT_LIST_HEAD(&sess->cr_active_list);
+       INIT_LIST_HEAD(&sess->cr_inactive_list);
+       init_completion(&sess->async_msg_comp);
+       init_completion(&sess->reinstatement_comp);
+       init_completion(&sess->session_wait_comp);
+       init_completion(&sess->session_waiting_on_uc_comp);
+       mutex_init(&sess->cmdsn_mutex);
+       spin_lock_init(&sess->conn_lock);
+       spin_lock_init(&sess->cr_a_lock);
+       spin_lock_init(&sess->cr_i_lock);
+       spin_lock_init(&sess->session_usage_lock);
+       spin_lock_init(&sess->ttt_lock);
+
+       if (!idr_pre_get(&sess_idr, GFP_KERNEL)) {
+               pr_err("idr_pre_get() for sess_idr failed\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+       spin_lock(&sess_idr_lock);
+       idr_get_new(&sess_idr, NULL, &sess->session_index);
+       spin_unlock(&sess_idr_lock);
+
+       sess->creation_time = get_jiffies_64();
+       spin_lock_init(&sess->session_stats_lock);
+       /*
+        * The FFP CmdSN window values will be allocated from the TPG's
+        * Initiator Node's ACL once the login has been successfully completed.
+        */
+       sess->max_cmd_sn        = pdu->cmdsn;
+
+       sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL);
+       if (!sess->sess_ops) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               pr_err("Unable to allocate memory for"
+                               " struct iscsi_sess_ops.\n");
+               return -1;
+       }
+
+       sess->se_sess = transport_init_session();
+       if (!sess->se_sess) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int iscsi_login_zero_tsih_s2(
+       struct iscsi_conn *conn)
+{
+       struct iscsi_node_attrib *na;
+       struct iscsi_session *sess = conn->sess;
+       unsigned char buf[32];
+
+       sess->tpg = conn->tpg;
+
+       /*
+        * Assign a new TPG Session Handle.  Note this is protected with
+        * struct iscsi_portal_group->np_login_sem from iscsit_access_np().
+        */
+       sess->tsih = ++ISCSI_TPG_S(sess)->ntsih;
+       if (!sess->tsih)
+               sess->tsih = ++ISCSI_TPG_S(sess)->ntsih;
+
+       /*
+        * Create the default params from user defined values..
+        */
+       if (iscsi_copy_param_list(&conn->param_list,
+                               ISCSI_TPG_C(conn)->param_list, 1) < 0) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+
+       iscsi_set_keys_to_negotiate(0, conn->param_list);
+
+       if (sess->sess_ops->SessionType)
+               return iscsi_set_keys_irrelevant_for_discovery(
+                               conn->param_list);
+
+       na = iscsit_tpg_get_node_attrib(sess);
+
+       /*
+        * Need to send TargetPortalGroupTag back in first login response
+        * on any iSCSI connection where the Initiator provides TargetName.
+        * See 5.3.1.  Login Phase Start
+        *
+        * In our case, we have already located the struct iscsi_tiqn at this point.
+        */
+       memset(buf, 0, 32);
+       sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt);
+       if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+
+       /*
+        * Workaround for Initiators that have broken connection recovery logic.
+        *
+        * "We would really like to get rid of this." Linux-iSCSI.org team
+        */
+       memset(buf, 0, 32);
+       sprintf(buf, "ErrorRecoveryLevel=%d", na->default_erl);
+       if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+
+       if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0)
+               return -1;
+
+       return 0;
+}
+
+/*
+ * Remove PSTATE_NEGOTIATE for the four FIM related keys.
+ * The Initiator node will be able to enable FIM by proposing them itself.
+ */
+int iscsi_login_disable_FIM_keys(
+       struct iscsi_param_list *param_list,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_param *param;
+
+       param = iscsi_find_param_from_key("OFMarker", param_list);
+       if (!param) {
+               pr_err("iscsi_find_param_from_key() for"
+                               " OFMarker failed\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+       param->state &= ~PSTATE_NEGOTIATE;
+
+       param = iscsi_find_param_from_key("OFMarkInt", param_list);
+       if (!param) {
+               pr_err("iscsi_find_param_from_key() for"
+                               " IFMarker failed\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+       param->state &= ~PSTATE_NEGOTIATE;
+
+       param = iscsi_find_param_from_key("IFMarker", param_list);
+       if (!param) {
+               pr_err("iscsi_find_param_from_key() for"
+                               " IFMarker failed\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+       param->state &= ~PSTATE_NEGOTIATE;
+
+       param = iscsi_find_param_from_key("IFMarkInt", param_list);
+       if (!param) {
+               pr_err("iscsi_find_param_from_key() for"
+                               " IFMarker failed\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+       param->state &= ~PSTATE_NEGOTIATE;
+
+       return 0;
+}
+
+static int iscsi_login_non_zero_tsih_s1(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+       iscsi_login_set_conn_values(NULL, conn, pdu->cid);
+       return 0;
+}
+
+/*
+ *     Add a new connection to an existing session.
+ */
+static int iscsi_login_non_zero_tsih_s2(
+       struct iscsi_conn *conn,
+       unsigned char *buf)
+{
+       struct iscsi_portal_group *tpg = conn->tpg;
+       struct iscsi_session *sess = NULL, *sess_p = NULL;
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+       struct se_session *se_sess, *se_sess_tmp;
+       struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+       spin_lock_bh(&se_tpg->session_lock);
+       list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+                       sess_list) {
+
+               sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (atomic_read(&sess_p->session_fall_back_to_erl0) ||
+                   atomic_read(&sess_p->session_logout) ||
+                  (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED))
+                       continue;
+               if (!memcmp((const void *)sess_p->isid,
+                    (const void *)pdu->isid, 6) &&
+                    (sess_p->tsih == pdu->tsih)) {
+                       iscsit_inc_session_usage_count(sess_p);
+                       iscsit_stop_time2retain_timer(sess_p);
+                       sess = sess_p;
+                       break;
+               }
+       }
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       /*
+        * If the Time2Retain handler has expired, the session is already gone.
+        */
+       if (!sess) {
+               pr_err("Initiator attempting to add a connection to"
+                       " a non-existent session, rejecting iSCSI Login.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_NO_SESSION);
+               return -1;
+       }
+
+       /*
+        * Stop the Time2Retain timer if this is a failed session, we restart
+        * the timer if the login is not successful.
+        */
+       spin_lock_bh(&sess->conn_lock);
+       if (sess->session_state == TARG_SESS_STATE_FAILED)
+               atomic_set(&sess->session_continuation, 1);
+       spin_unlock_bh(&sess->conn_lock);
+
+       iscsi_login_set_conn_values(sess, conn, pdu->cid);
+
+       if (iscsi_copy_param_list(&conn->param_list,
+                       ISCSI_TPG_C(conn)->param_list, 0) < 0) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+
+       iscsi_set_keys_to_negotiate(0, conn->param_list);
+       /*
+        * Need to send TargetPortalGroupTag back in first login response
+        * on any iSCSI connection where the Initiator provides TargetName.
+        * See 5.3.1.  Login Phase Start
+        *
+        * In our case, we have already located the struct iscsi_tiqn at this point.
+        */
+       memset(buf, 0, 32);
+       sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt);
+       if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               return -1;
+       }
+
+       return iscsi_login_disable_FIM_keys(conn->param_list, conn);
+}
+
+int iscsi_login_post_auth_non_zero_tsih(
+       struct iscsi_conn *conn,
+       u16 cid,
+       u32 exp_statsn)
+{
+       struct iscsi_conn *conn_ptr = NULL;
+       struct iscsi_conn_recovery *cr = NULL;
+       struct iscsi_session *sess = conn->sess;
+
+       /*
+        * By following item 5 in the login table,  if we have found
+        * an existing ISID and a valid/existing TSIH and an existing
+        * CID we do connection reinstatement.  Currently we dont not
+        * support it so we send back an non-zero status class to the
+        * initiator and release the new connection.
+        */
+       conn_ptr = iscsit_get_conn_from_cid_rcfr(sess, cid);
+       if ((conn_ptr)) {
+               pr_err("Connection exists with CID %hu for %s,"
+                       " performing connection reinstatement.\n",
+                       conn_ptr->cid, sess->sess_ops->InitiatorName);
+
+               iscsit_connection_reinstatement_rcfr(conn_ptr);
+               iscsit_dec_conn_usage_count(conn_ptr);
+       }
+
+       /*
+        * Check for any connection recovery entires containing CID.
+        * We use the original ExpStatSN sent in the first login request
+        * to acknowledge commands for the failed connection.
+        *
+        * Also note that an explict logout may have already been sent,
+        * but the response may not be sent due to additional connection
+        * loss.
+        */
+       if (sess->sess_ops->ErrorRecoveryLevel == 2) {
+               cr = iscsit_get_inactive_connection_recovery_entry(
+                               sess, cid);
+               if ((cr)) {
+                       pr_debug("Performing implicit logout"
+                               " for connection recovery on CID: %hu\n",
+                                       conn->cid);
+                       iscsit_discard_cr_cmds_by_expstatsn(cr, exp_statsn);
+               }
+       }
+
+       /*
+        * Else we follow item 4 from the login table in that we have
+        * found an existing ISID and a valid/existing TSIH and a new
+        * CID we go ahead and continue to add a new connection to the
+        * session.
+        */
+       pr_debug("Adding CID %hu to existing session for %s.\n",
+                       cid, sess->sess_ops->InitiatorName);
+
+       if ((atomic_read(&sess->nconn) + 1) > sess->sess_ops->MaxConnections) {
+               pr_err("Adding additional connection to this session"
+                       " would exceed MaxConnections %d, login failed.\n",
+                               sess->sess_ops->MaxConnections);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_ISID_ERROR);
+               return -1;
+       }
+
+       return 0;
+}
+
+static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+
+       if (!sess->sess_ops->SessionType)
+               iscsit_start_nopin_timer(conn);
+}
+
+static int iscsi_post_login_handler(
+       struct iscsi_np *np,
+       struct iscsi_conn *conn,
+       u8 zero_tsih)
+{
+       int stop_timer = 0;
+       struct iscsi_session *sess = conn->sess;
+       struct se_session *se_sess = sess->se_sess;
+       struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+       struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+       struct iscsi_thread_set *ts;
+
+       iscsit_inc_conn_usage_count(conn);
+
+       iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_SUCCESS,
+                       ISCSI_LOGIN_STATUS_ACCEPT);
+
+       pr_debug("Moving to TARG_CONN_STATE_LOGGED_IN.\n");
+       conn->conn_state = TARG_CONN_STATE_LOGGED_IN;
+
+       iscsi_set_connection_parameters(conn->conn_ops, conn->param_list);
+       iscsit_set_sync_and_steering_values(conn);
+       /*
+        * SCSI Initiator -> SCSI Target Port Mapping
+        */
+       ts = iscsi_get_thread_set();
+       if (!zero_tsih) {
+               iscsi_set_session_parameters(sess->sess_ops,
+                               conn->param_list, 0);
+               iscsi_release_param_list(conn->param_list);
+               conn->param_list = NULL;
+
+               spin_lock_bh(&sess->conn_lock);
+               atomic_set(&sess->session_continuation, 0);
+               if (sess->session_state == TARG_SESS_STATE_FAILED) {
+                       pr_debug("Moving to"
+                                       " TARG_SESS_STATE_LOGGED_IN.\n");
+                       sess->session_state = TARG_SESS_STATE_LOGGED_IN;
+                       stop_timer = 1;
+               }
+
+               pr_debug("iSCSI Login successful on CID: %hu from %s to"
+                       " %s:%hu,%hu\n", conn->cid, conn->login_ip, np->np_ip,
+                               np->np_port, tpg->tpgt);
+
+               list_add_tail(&conn->conn_list, &sess->sess_conn_list);
+               atomic_inc(&sess->nconn);
+               pr_debug("Incremented iSCSI Connection count to %hu"
+                       " from node: %s\n", atomic_read(&sess->nconn),
+                       sess->sess_ops->InitiatorName);
+               spin_unlock_bh(&sess->conn_lock);
+
+               iscsi_post_login_start_timers(conn);
+               iscsi_activate_thread_set(conn, ts);
+               /*
+                * Determine CPU mask to ensure connection's RX and TX kthreads
+                * are scheduled on the same CPU.
+                */
+               iscsit_thread_get_cpumask(conn);
+               conn->conn_rx_reset_cpumask = 1;
+               conn->conn_tx_reset_cpumask = 1;
+
+               iscsit_dec_conn_usage_count(conn);
+               if (stop_timer) {
+                       spin_lock_bh(&se_tpg->session_lock);
+                       iscsit_stop_time2retain_timer(sess);
+                       spin_unlock_bh(&se_tpg->session_lock);
+               }
+               iscsit_dec_session_usage_count(sess);
+               return 0;
+       }
+
+       iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1);
+       iscsi_release_param_list(conn->param_list);
+       conn->param_list = NULL;
+
+       iscsit_determine_maxcmdsn(sess);
+
+       spin_lock_bh(&se_tpg->session_lock);
+       __transport_register_session(&sess->tpg->tpg_se_tpg,
+                       se_sess->se_node_acl, se_sess, (void *)sess);
+       pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n");
+       sess->session_state = TARG_SESS_STATE_LOGGED_IN;
+
+       pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n",
+               conn->cid, conn->login_ip, np->np_ip, np->np_port, tpg->tpgt);
+
+       spin_lock_bh(&sess->conn_lock);
+       list_add_tail(&conn->conn_list, &sess->sess_conn_list);
+       atomic_inc(&sess->nconn);
+       pr_debug("Incremented iSCSI Connection count to %hu from node:"
+               " %s\n", atomic_read(&sess->nconn),
+               sess->sess_ops->InitiatorName);
+       spin_unlock_bh(&sess->conn_lock);
+
+       sess->sid = tpg->sid++;
+       if (!sess->sid)
+               sess->sid = tpg->sid++;
+       pr_debug("Established iSCSI session from node: %s\n",
+                       sess->sess_ops->InitiatorName);
+
+       tpg->nsessions++;
+       if (tpg->tpg_tiqn)
+               tpg->tpg_tiqn->tiqn_nsessions++;
+
+       pr_debug("Incremented number of active iSCSI sessions to %u on"
+               " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt);
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       iscsi_post_login_start_timers(conn);
+       iscsi_activate_thread_set(conn, ts);
+       /*
+        * Determine CPU mask to ensure connection's RX and TX kthreads
+        * are scheduled on the same CPU.
+        */
+       iscsit_thread_get_cpumask(conn);
+       conn->conn_rx_reset_cpumask = 1;
+       conn->conn_tx_reset_cpumask = 1;
+
+       iscsit_dec_conn_usage_count(conn);
+
+       return 0;
+}
+
+static void iscsi_handle_login_thread_timeout(unsigned long data)
+{
+       struct iscsi_np *np = (struct iscsi_np *) data;
+
+       spin_lock_bh(&np->np_thread_lock);
+       pr_err("iSCSI Login timeout on Network Portal %s:%hu\n",
+                       np->np_ip, np->np_port);
+
+       if (np->np_login_timer_flags & ISCSI_TF_STOP) {
+               spin_unlock_bh(&np->np_thread_lock);
+               return;
+       }
+
+       if (np->np_thread)
+               send_sig(SIGINT, np->np_thread, 1);
+
+       np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
+       spin_unlock_bh(&np->np_thread_lock);
+}
+
+static void iscsi_start_login_thread_timer(struct iscsi_np *np)
+{
+       /*
+        * This used the TA_LOGIN_TIMEOUT constant because at this
+        * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout
+        */
+       spin_lock_bh(&np->np_thread_lock);
+       init_timer(&np->np_login_timer);
+       np->np_login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ);
+       np->np_login_timer.data = (unsigned long)np;
+       np->np_login_timer.function = iscsi_handle_login_thread_timeout;
+       np->np_login_timer_flags &= ~ISCSI_TF_STOP;
+       np->np_login_timer_flags |= ISCSI_TF_RUNNING;
+       add_timer(&np->np_login_timer);
+
+       pr_debug("Added timeout timer to iSCSI login request for"
+                       " %u seconds.\n", TA_LOGIN_TIMEOUT);
+       spin_unlock_bh(&np->np_thread_lock);
+}
+
+static void iscsi_stop_login_thread_timer(struct iscsi_np *np)
+{
+       spin_lock_bh(&np->np_thread_lock);
+       if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) {
+               spin_unlock_bh(&np->np_thread_lock);
+               return;
+       }
+       np->np_login_timer_flags |= ISCSI_TF_STOP;
+       spin_unlock_bh(&np->np_thread_lock);
+
+       del_timer_sync(&np->np_login_timer);
+
+       spin_lock_bh(&np->np_thread_lock);
+       np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
+       spin_unlock_bh(&np->np_thread_lock);
+}
+
+int iscsi_target_setup_login_socket(
+       struct iscsi_np *np,
+       struct __kernel_sockaddr_storage *sockaddr)
+{
+       struct socket *sock;
+       int backlog = 5, ret, opt = 0, len;
+
+       switch (np->np_network_transport) {
+       case ISCSI_TCP:
+               np->np_ip_proto = IPPROTO_TCP;
+               np->np_sock_type = SOCK_STREAM;
+               break;
+       case ISCSI_SCTP_TCP:
+               np->np_ip_proto = IPPROTO_SCTP;
+               np->np_sock_type = SOCK_STREAM;
+               break;
+       case ISCSI_SCTP_UDP:
+               np->np_ip_proto = IPPROTO_SCTP;
+               np->np_sock_type = SOCK_SEQPACKET;
+               break;
+       case ISCSI_IWARP_TCP:
+       case ISCSI_IWARP_SCTP:
+       case ISCSI_INFINIBAND:
+       default:
+               pr_err("Unsupported network_transport: %d\n",
+                               np->np_network_transport);
+               return -EINVAL;
+       }
+
+       ret = sock_create(sockaddr->ss_family, np->np_sock_type,
+                       np->np_ip_proto, &sock);
+       if (ret < 0) {
+               pr_err("sock_create() failed.\n");
+               return ret;
+       }
+       np->np_socket = sock;
+       /*
+        * The SCTP stack needs struct socket->file.
+        */
+       if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+           (np->np_network_transport == ISCSI_SCTP_UDP)) {
+               if (!sock->file) {
+                       sock->file = kzalloc(sizeof(struct file), GFP_KERNEL);
+                       if (!sock->file) {
+                               pr_err("Unable to allocate struct"
+                                               " file for SCTP\n");
+                               ret = -ENOMEM;
+                               goto fail;
+                       }
+                       np->np_flags |= NPF_SCTP_STRUCT_FILE;
+               }
+       }
+       /*
+        * Setup the np->np_sockaddr from the passed sockaddr setup
+        * in iscsi_target_configfs.c code..
+        */
+       memcpy((void *)&np->np_sockaddr, (void *)sockaddr,
+                       sizeof(struct __kernel_sockaddr_storage));
+
+       if (sockaddr->ss_family == AF_INET6)
+               len = sizeof(struct sockaddr_in6);
+       else
+               len = sizeof(struct sockaddr_in);
+       /*
+        * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
+        */
+       opt = 1;
+       if (np->np_network_transport == ISCSI_TCP) {
+               ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
+                               (char *)&opt, sizeof(opt));
+               if (ret < 0) {
+                       pr_err("kernel_setsockopt() for TCP_NODELAY"
+                               " failed: %d\n", ret);
+                       goto fail;
+               }
+       }
+
+       ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+                       (char *)&opt, sizeof(opt));
+       if (ret < 0) {
+               pr_err("kernel_setsockopt() for SO_REUSEADDR"
+                       " failed\n");
+               goto fail;
+       }
+
+       ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len);
+       if (ret < 0) {
+               pr_err("kernel_bind() failed: %d\n", ret);
+               goto fail;
+       }
+
+       ret = kernel_listen(sock, backlog);
+       if (ret != 0) {
+               pr_err("kernel_listen() failed: %d\n", ret);
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       np->np_socket = NULL;
+       if (sock) {
+               if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+                       kfree(sock->file);
+                       sock->file = NULL;
+               }
+
+               sock_release(sock);
+       }
+       return ret;
+}
+
+static int __iscsi_target_login_thread(struct iscsi_np *np)
+{
+       u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0;
+       int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop;
+       struct iscsi_conn *conn = NULL;
+       struct iscsi_login *login;
+       struct iscsi_portal_group *tpg = NULL;
+       struct socket *new_sock, *sock;
+       struct kvec iov;
+       struct iscsi_login_req *pdu;
+       struct sockaddr_in sock_in;
+       struct sockaddr_in6 sock_in6;
+
+       flush_signals(current);
+       set_sctp_conn_flag = 0;
+       sock = np->np_socket;
+       ip_proto = np->np_ip_proto;
+       sock_type = np->np_sock_type;
+
+       spin_lock_bh(&np->np_thread_lock);
+       if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+               np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+               complete(&np->np_restart_comp);
+       } else {
+               np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+       }
+       spin_unlock_bh(&np->np_thread_lock);
+
+       if (kernel_accept(sock, &new_sock, 0) < 0) {
+               spin_lock_bh(&np->np_thread_lock);
+               if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+                       spin_unlock_bh(&np->np_thread_lock);
+                       complete(&np->np_restart_comp);
+                       /* Get another socket */
+                       return 1;
+               }
+               spin_unlock_bh(&np->np_thread_lock);
+               goto out;
+       }
+       /*
+        * The SCTP stack needs struct socket->file.
+        */
+       if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+           (np->np_network_transport == ISCSI_SCTP_UDP)) {
+               if (!new_sock->file) {
+                       new_sock->file = kzalloc(
+                                       sizeof(struct file), GFP_KERNEL);
+                       if (!new_sock->file) {
+                               pr_err("Unable to allocate struct"
+                                               " file for SCTP\n");
+                               sock_release(new_sock);
+                               /* Get another socket */
+                               return 1;
+                       }
+                       set_sctp_conn_flag = 1;
+               }
+       }
+
+       iscsi_start_login_thread_timer(np);
+
+       conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL);
+       if (!conn) {
+               pr_err("Could not allocate memory for"
+                       " new connection\n");
+               if (set_sctp_conn_flag) {
+                       kfree(new_sock->file);
+                       new_sock->file = NULL;
+               }
+               sock_release(new_sock);
+               /* Get another socket */
+               return 1;
+       }
+
+       pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+       conn->conn_state = TARG_CONN_STATE_FREE;
+       conn->sock = new_sock;
+
+       if (set_sctp_conn_flag)
+               conn->conn_flags |= CONNFLAG_SCTP_STRUCT_FILE;
+
+       pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n");
+       conn->conn_state = TARG_CONN_STATE_XPT_UP;
+
+       /*
+        * Allocate conn->conn_ops early as a failure calling
+        * iscsit_tx_login_rsp() below will call tx_data().
+        */
+       conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL);
+       if (!conn->conn_ops) {
+               pr_err("Unable to allocate memory for"
+                       " struct iscsi_conn_ops.\n");
+               goto new_sess_out;
+       }
+       /*
+        * Perform the remaining iSCSI connection initialization items..
+        */
+       if (iscsi_login_init_conn(conn) < 0)
+               goto new_sess_out;
+
+       memset(buffer, 0, ISCSI_HDR_LEN);
+       memset(&iov, 0, sizeof(struct kvec));
+       iov.iov_base    = buffer;
+       iov.iov_len     = ISCSI_HDR_LEN;
+
+       if (rx_data(conn, &iov, 1, ISCSI_HDR_LEN) <= 0) {
+               pr_err("rx_data() returned an error.\n");
+               goto new_sess_out;
+       }
+
+       iscsi_opcode = (buffer[0] & ISCSI_OPCODE_MASK);
+       if (!(iscsi_opcode & ISCSI_OP_LOGIN)) {
+               pr_err("First opcode is not login request,"
+                       " failing login request.\n");
+               goto new_sess_out;
+       }
+
+       pdu                     = (struct iscsi_login_req *) buffer;
+       pdu->cid                = be16_to_cpu(pdu->cid);
+       pdu->tsih               = be16_to_cpu(pdu->tsih);
+       pdu->itt                = be32_to_cpu(pdu->itt);
+       pdu->cmdsn              = be32_to_cpu(pdu->cmdsn);
+       pdu->exp_statsn         = be32_to_cpu(pdu->exp_statsn);
+       /*
+        * Used by iscsit_tx_login_rsp() for Login Resonses PDUs
+        * when Status-Class != 0.
+       */
+       conn->login_itt         = pdu->itt;
+
+       spin_lock_bh(&np->np_thread_lock);
+       if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+               spin_unlock_bh(&np->np_thread_lock);
+               pr_err("iSCSI Network Portal on %s:%hu currently not"
+                       " active.\n", np->np_ip, np->np_port);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+               goto new_sess_out;
+       }
+       spin_unlock_bh(&np->np_thread_lock);
+
+       if (np->np_sockaddr.ss_family == AF_INET6) {
+               memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
+
+               if (conn->sock->ops->getname(conn->sock,
+                               (struct sockaddr *)&sock_in6, &err, 1) < 0) {
+                       pr_err("sock_ops->getname() failed.\n");
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
+                       goto new_sess_out;
+               }
+#if 0
+               if (!iscsi_ntop6((const unsigned char *)
+                               &sock_in6.sin6_addr.in6_u,
+                               (char *)&conn->ipv6_login_ip[0],
+                               IPV6_ADDRESS_SPACE)) {
+                       pr_err("iscsi_ntop6() failed\n");
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
+                       goto new_sess_out;
+               }
+#else
+               pr_debug("Skipping iscsi_ntop6()\n");
+#endif
+       } else {
+               memset(&sock_in, 0, sizeof(struct sockaddr_in));
+
+               if (conn->sock->ops->getname(conn->sock,
+                               (struct sockaddr *)&sock_in, &err, 1) < 0) {
+                       pr_err("sock_ops->getname() failed.\n");
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
+                       goto new_sess_out;
+               }
+               sprintf(conn->login_ip, "%pI4", &sock_in.sin_addr.s_addr);
+               conn->login_port = ntohs(sock_in.sin_port);
+       }
+
+       conn->network_transport = np->np_network_transport;
+
+       pr_debug("Received iSCSI login request from %s on %s Network"
+                       " Portal %s:%hu\n", conn->login_ip,
+               (conn->network_transport == ISCSI_TCP) ? "TCP" : "SCTP",
+                       np->np_ip, np->np_port);
+
+       pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n");
+       conn->conn_state        = TARG_CONN_STATE_IN_LOGIN;
+
+       if (iscsi_login_check_initiator_version(conn, pdu->max_version,
+                       pdu->min_version) < 0)
+               goto new_sess_out;
+
+       zero_tsih = (pdu->tsih == 0x0000);
+       if ((zero_tsih)) {
+               /*
+                * This is the leading connection of a new session.
+                * We wait until after authentication to check for
+                * session reinstatement.
+                */
+               if (iscsi_login_zero_tsih_s1(conn, buffer) < 0)
+                       goto new_sess_out;
+       } else {
+               /*
+                * Add a new connection to an existing session.
+                * We check for a non-existant session in
+                * iscsi_login_non_zero_tsih_s2() below based
+                * on ISID/TSIH, but wait until after authentication
+                * to check for connection reinstatement, etc.
+                */
+               if (iscsi_login_non_zero_tsih_s1(conn, buffer) < 0)
+                       goto new_sess_out;
+       }
+
+       /*
+        * This will process the first login request, and call
+        * iscsi_target_locate_portal(), and return a valid struct iscsi_login.
+        */
+       login = iscsi_target_init_negotiation(np, conn, buffer);
+       if (!login) {
+               tpg = conn->tpg;
+               goto new_sess_out;
+       }
+
+       tpg = conn->tpg;
+       if (!tpg) {
+               pr_err("Unable to locate struct iscsi_conn->tpg\n");
+               goto new_sess_out;
+       }
+
+       if (zero_tsih) {
+               if (iscsi_login_zero_tsih_s2(conn) < 0) {
+                       iscsi_target_nego_release(login, conn);
+                       goto new_sess_out;
+               }
+       } else {
+               if (iscsi_login_non_zero_tsih_s2(conn, buffer) < 0) {
+                       iscsi_target_nego_release(login, conn);
+                       goto old_sess_out;
+               }
+       }
+
+       if (iscsi_target_start_negotiation(login, conn) < 0)
+               goto new_sess_out;
+
+       if (!conn->sess) {
+               pr_err("struct iscsi_conn session pointer is NULL!\n");
+               goto new_sess_out;
+       }
+
+       iscsi_stop_login_thread_timer(np);
+
+       if (signal_pending(current))
+               goto new_sess_out;
+
+       ret = iscsi_post_login_handler(np, conn, zero_tsih);
+
+       if (ret < 0)
+               goto new_sess_out;
+
+       iscsit_deaccess_np(np, tpg);
+       tpg = NULL;
+       /* Get another socket */
+       return 1;
+
+new_sess_out:
+       pr_err("iSCSI Login negotiation failed.\n");
+       iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                 ISCSI_LOGIN_STATUS_INIT_ERR);
+       if (!zero_tsih || !conn->sess)
+               goto old_sess_out;
+       if (conn->sess->se_sess)
+               transport_free_session(conn->sess->se_sess);
+       if (conn->sess->session_index != 0) {
+               spin_lock_bh(&sess_idr_lock);
+               idr_remove(&sess_idr, conn->sess->session_index);
+               spin_unlock_bh(&sess_idr_lock);
+       }
+       if (conn->sess->sess_ops)
+               kfree(conn->sess->sess_ops);
+       if (conn->sess)
+               kfree(conn->sess);
+old_sess_out:
+       iscsi_stop_login_thread_timer(np);
+       /*
+        * If login negotiation fails check if the Time2Retain timer
+        * needs to be restarted.
+        */
+       if (!zero_tsih && conn->sess) {
+               spin_lock_bh(&conn->sess->conn_lock);
+               if (conn->sess->session_state == TARG_SESS_STATE_FAILED) {
+                       struct se_portal_group *se_tpg =
+                                       &ISCSI_TPG_C(conn)->tpg_se_tpg;
+
+                       atomic_set(&conn->sess->session_continuation, 0);
+                       spin_unlock_bh(&conn->sess->conn_lock);
+                       spin_lock_bh(&se_tpg->session_lock);
+                       iscsit_start_time2retain_handler(conn->sess);
+                       spin_unlock_bh(&se_tpg->session_lock);
+               } else
+                       spin_unlock_bh(&conn->sess->conn_lock);
+               iscsit_dec_session_usage_count(conn->sess);
+       }
+
+       if (!IS_ERR(conn->conn_rx_hash.tfm))
+               crypto_free_hash(conn->conn_rx_hash.tfm);
+       if (!IS_ERR(conn->conn_tx_hash.tfm))
+               crypto_free_hash(conn->conn_tx_hash.tfm);
+
+       if (conn->conn_cpumask)
+               free_cpumask_var(conn->conn_cpumask);
+
+       kfree(conn->conn_ops);
+
+       if (conn->param_list) {
+               iscsi_release_param_list(conn->param_list);
+               conn->param_list = NULL;
+       }
+       if (conn->sock) {
+               if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+                       kfree(conn->sock->file);
+                       conn->sock->file = NULL;
+               }
+               sock_release(conn->sock);
+       }
+       kfree(conn);
+
+       if (tpg) {
+               iscsit_deaccess_np(np, tpg);
+               tpg = NULL;
+       }
+
+out:
+       stop = kthread_should_stop();
+       if (!stop && signal_pending(current)) {
+               spin_lock_bh(&np->np_thread_lock);
+               stop = (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN);
+               spin_unlock_bh(&np->np_thread_lock);
+       }
+       /* Wait for another socket.. */
+       if (!stop)
+               return 1;
+
+       iscsi_stop_login_thread_timer(np);
+       spin_lock_bh(&np->np_thread_lock);
+       np->np_thread_state = ISCSI_NP_THREAD_EXIT;
+       spin_unlock_bh(&np->np_thread_lock);
+       return 0;
+}
+
+int iscsi_target_login_thread(void *arg)
+{
+       struct iscsi_np *np = (struct iscsi_np *)arg;
+       int ret;
+
+       allow_signal(SIGINT);
+
+       while (!kthread_should_stop()) {
+               ret = __iscsi_target_login_thread(np);
+               /*
+                * We break and exit here unless another sock_accept() call
+                * is expected.
+                */
+               if (ret != 1)
+                       break;
+       }
+
+       return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h
new file mode 100644 (file)
index 0000000..091dcae
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef ISCSI_TARGET_LOGIN_H
+#define ISCSI_TARGET_LOGIN_H
+
+extern int iscsi_login_setup_crypto(struct iscsi_conn *);
+extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *);
+extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32);
+extern int iscsi_target_setup_login_socket(struct iscsi_np *,
+                               struct __kernel_sockaddr_storage *);
+extern int iscsi_target_login_thread(void *);
+extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *);
+
+#endif   /*** ISCSI_TARGET_LOGIN_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
new file mode 100644 (file)
index 0000000..713a4d2
--- /dev/null
@@ -0,0 +1,1067 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI Parameter negotiation.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/ctype.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tpg.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_auth.h"
+
+#define MAX_LOGIN_PDUS  7
+#define TEXT_LEN       4096
+
+void convert_null_to_semi(char *buf, int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (buf[i] == '\0')
+                       buf[i] = ';';
+}
+
+int strlen_semi(char *buf)
+{
+       int i = 0;
+
+       while (buf[i] != '\0') {
+               if (buf[i] == ';')
+                       return i;
+               i++;
+       }
+
+       return -1;
+}
+
+int extract_param(
+       const char *in_buf,
+       const char *pattern,
+       unsigned int max_length,
+       char *out_buf,
+       unsigned char *type)
+{
+       char *ptr;
+       int len;
+
+       if (!in_buf || !pattern || !out_buf || !type)
+               return -1;
+
+       ptr = strstr(in_buf, pattern);
+       if (!ptr)
+               return -1;
+
+       ptr = strstr(ptr, "=");
+       if (!ptr)
+               return -1;
+
+       ptr += 1;
+       if (*ptr == '0' && (*(ptr+1) == 'x' || *(ptr+1) == 'X')) {
+               ptr += 2; /* skip 0x */
+               *type = HEX;
+       } else
+               *type = DECIMAL;
+
+       len = strlen_semi(ptr);
+       if (len < 0)
+               return -1;
+
+       if (len > max_length) {
+               pr_err("Length of input: %d exeeds max_length:"
+                       " %d\n", len, max_length);
+               return -1;
+       }
+       memcpy(out_buf, ptr, len);
+       out_buf[len] = '\0';
+
+       return 0;
+}
+
+static u32 iscsi_handle_authentication(
+       struct iscsi_conn *conn,
+       char *in_buf,
+       char *out_buf,
+       int in_length,
+       int *out_length,
+       unsigned char *authtype)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_auth *auth;
+       struct iscsi_node_acl *iscsi_nacl;
+       struct se_node_acl *se_nacl;
+
+       if (!sess->sess_ops->SessionType) {
+               /*
+                * For SessionType=Normal
+                */
+               se_nacl = conn->sess->se_sess->se_node_acl;
+               if (!se_nacl) {
+                       pr_err("Unable to locate struct se_node_acl for"
+                                       " CHAP auth\n");
+                       return -1;
+               }
+               iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl,
+                               se_node_acl);
+               if (!iscsi_nacl) {
+                       pr_err("Unable to locate struct iscsi_node_acl for"
+                                       " CHAP auth\n");
+                       return -1;
+               }
+
+               auth = ISCSI_NODE_AUTH(iscsi_nacl);
+       } else {
+               /*
+                * For SessionType=Discovery
+                */
+               auth = &iscsit_global->discovery_acl.node_auth;
+       }
+
+       if (strstr("CHAP", authtype))
+               strcpy(conn->sess->auth_type, "CHAP");
+       else
+               strcpy(conn->sess->auth_type, NONE);
+
+       if (strstr("None", authtype))
+               return 1;
+#ifdef CANSRP
+       else if (strstr("SRP", authtype))
+               return srp_main_loop(conn, auth, in_buf, out_buf,
+                               &in_length, out_length);
+#endif
+       else if (strstr("CHAP", authtype))
+               return chap_main_loop(conn, auth, in_buf, out_buf,
+                               &in_length, out_length);
+       else if (strstr("SPKM1", authtype))
+               return 2;
+       else if (strstr("SPKM2", authtype))
+               return 2;
+       else if (strstr("KRB5", authtype))
+               return 2;
+       else
+               return 2;
+}
+
+static void iscsi_remove_failed_auth_entry(struct iscsi_conn *conn)
+{
+       kfree(conn->auth_protocol);
+}
+
+static int iscsi_target_check_login_request(
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       int req_csg, req_nsg, rsp_csg, rsp_nsg;
+       u32 payload_length;
+       struct iscsi_login_req *login_req;
+       struct iscsi_login_rsp *login_rsp;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       login_rsp = (struct iscsi_login_rsp *) login->rsp;
+       payload_length = ntoh24(login_req->dlength);
+
+       switch (login_req->opcode & ISCSI_OPCODE_MASK) {
+       case ISCSI_OP_LOGIN:
+               break;
+       default:
+               pr_err("Received unknown opcode 0x%02x.\n",
+                               login_req->opcode & ISCSI_OPCODE_MASK);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       if ((login_req->flags & ISCSI_FLAG_LOGIN_CONTINUE) &&
+           (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+               pr_err("Login request has both ISCSI_FLAG_LOGIN_CONTINUE"
+                       " and ISCSI_FLAG_LOGIN_TRANSIT set, protocol error.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       req_csg = (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+       rsp_csg = (login_rsp->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+       req_nsg = (login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
+       rsp_nsg = (login_rsp->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
+
+       if (req_csg != login->current_stage) {
+               pr_err("Initiator unexpectedly changed login stage"
+                       " from %d to %d, login failed.\n", login->current_stage,
+                       req_csg);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       if ((req_nsg == 2) || (req_csg >= 2) ||
+          ((login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT) &&
+           (req_nsg <= req_csg))) {
+               pr_err("Illegal login_req->flags Combination, CSG: %d,"
+                       " NSG: %d, ISCSI_FLAG_LOGIN_TRANSIT: %d.\n", req_csg,
+                       req_nsg, (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT));
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       if ((login_req->max_version != login->version_max) ||
+           (login_req->min_version != login->version_min)) {
+               pr_err("Login request changed Version Max/Nin"
+                       " unexpectedly to 0x%02x/0x%02x, protocol error\n",
+                       login_req->max_version, login_req->min_version);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       if (memcmp(login_req->isid, login->isid, 6) != 0) {
+               pr_err("Login request changed ISID unexpectedly,"
+                               " protocol error.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       if (login_req->itt != login->init_task_tag) {
+               pr_err("Login request changed ITT unexpectedly to"
+                       " 0x%08x, protocol error.\n", login_req->itt);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_INIT_ERR);
+               return -1;
+       }
+
+       if (payload_length > MAX_KEY_VALUE_PAIRS) {
+               pr_err("Login request payload exceeds default"
+                       " MaxRecvDataSegmentLength: %u, protocol error.\n",
+                               MAX_KEY_VALUE_PAIRS);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int iscsi_target_check_first_request(
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       struct iscsi_param *param = NULL;
+       struct se_node_acl *se_nacl;
+
+       login->first_request = 0;
+
+       list_for_each_entry(param, &conn->param_list->param_list, p_list) {
+               if (!strncmp(param->name, SESSIONTYPE, 11)) {
+                       if (!IS_PSTATE_ACCEPTOR(param)) {
+                               pr_err("SessionType key not received"
+                                       " in first login request.\n");
+                               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                       ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+                               return -1;
+                       }
+                       if (!strncmp(param->value, DISCOVERY, 9))
+                               return 0;
+               }
+
+               if (!strncmp(param->name, INITIATORNAME, 13)) {
+                       if (!IS_PSTATE_ACCEPTOR(param)) {
+                               if (!login->leading_connection)
+                                       continue;
+
+                               pr_err("InitiatorName key not received"
+                                       " in first login request.\n");
+                               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                       ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+                               return -1;
+                       }
+
+                       /*
+                        * For non-leading connections, double check that the
+                        * received InitiatorName matches the existing session's
+                        * struct iscsi_node_acl.
+                        */
+                       if (!login->leading_connection) {
+                               se_nacl = conn->sess->se_sess->se_node_acl;
+                               if (!se_nacl) {
+                                       pr_err("Unable to locate"
+                                               " struct se_node_acl\n");
+                                       iscsit_tx_login_rsp(conn,
+                                                       ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                                       ISCSI_LOGIN_STATUS_TGT_NOT_FOUND);
+                                       return -1;
+                               }
+
+                               if (strcmp(param->value,
+                                               se_nacl->initiatorname)) {
+                                       pr_err("Incorrect"
+                                               " InitiatorName: %s for this"
+                                               " iSCSI Initiator Node.\n",
+                                               param->value);
+                                       iscsit_tx_login_rsp(conn,
+                                                       ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                                       ISCSI_LOGIN_STATUS_TGT_NOT_FOUND);
+                                       return -1;
+                               }
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+       u32 padding = 0;
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_login_rsp *login_rsp;
+
+       login_rsp = (struct iscsi_login_rsp *) login->rsp;
+
+       login_rsp->opcode               = ISCSI_OP_LOGIN_RSP;
+       hton24(login_rsp->dlength, login->rsp_length);
+       memcpy(login_rsp->isid, login->isid, 6);
+       login_rsp->tsih                 = cpu_to_be16(login->tsih);
+       login_rsp->itt                  = cpu_to_be32(login->init_task_tag);
+       login_rsp->statsn               = cpu_to_be32(conn->stat_sn++);
+       login_rsp->exp_cmdsn            = cpu_to_be32(conn->sess->exp_cmd_sn);
+       login_rsp->max_cmdsn            = cpu_to_be32(conn->sess->max_cmd_sn);
+
+       pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x,"
+               " ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:"
+               " %u\n", login_rsp->flags, ntohl(login_rsp->itt),
+               ntohl(login_rsp->exp_cmdsn), ntohl(login_rsp->max_cmdsn),
+               ntohl(login_rsp->statsn), login->rsp_length);
+
+       padding = ((-login->rsp_length) & 3);
+
+       if (iscsi_login_tx_data(
+                       conn,
+                       login->rsp,
+                       login->rsp_buf,
+                       login->rsp_length + padding) < 0)
+               return -1;
+
+       login->rsp_length               = 0;
+       login_rsp->tsih                 = be16_to_cpu(login_rsp->tsih);
+       login_rsp->itt                  = be32_to_cpu(login_rsp->itt);
+       login_rsp->statsn               = be32_to_cpu(login_rsp->statsn);
+       mutex_lock(&sess->cmdsn_mutex);
+       login_rsp->exp_cmdsn            = be32_to_cpu(sess->exp_cmd_sn);
+       login_rsp->max_cmdsn            = be32_to_cpu(sess->max_cmd_sn);
+       mutex_unlock(&sess->cmdsn_mutex);
+
+       return 0;
+}
+
+static int iscsi_target_do_rx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+       u32 padding = 0, payload_length;
+       struct iscsi_login_req *login_req;
+
+       if (iscsi_login_rx_data(conn, login->req, ISCSI_HDR_LEN) < 0)
+               return -1;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       payload_length                  = ntoh24(login_req->dlength);
+       login_req->tsih                 = be16_to_cpu(login_req->tsih);
+       login_req->itt                  = be32_to_cpu(login_req->itt);
+       login_req->cid                  = be16_to_cpu(login_req->cid);
+       login_req->cmdsn                = be32_to_cpu(login_req->cmdsn);
+       login_req->exp_statsn           = be32_to_cpu(login_req->exp_statsn);
+
+       pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x,"
+               " CmdSN: 0x%08x, ExpStatSN: 0x%08x, CID: %hu, Length: %u\n",
+                login_req->flags, login_req->itt, login_req->cmdsn,
+                login_req->exp_statsn, login_req->cid, payload_length);
+
+       if (iscsi_target_check_login_request(conn, login) < 0)
+               return -1;
+
+       padding = ((-payload_length) & 3);
+       memset(login->req_buf, 0, MAX_KEY_VALUE_PAIRS);
+
+       if (iscsi_login_rx_data(
+                       conn,
+                       login->req_buf,
+                       payload_length + padding) < 0)
+               return -1;
+
+       return 0;
+}
+
+static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+       if (iscsi_target_do_tx_login_io(conn, login) < 0)
+               return -1;
+
+       if (iscsi_target_do_rx_login_io(conn, login) < 0)
+               return -1;
+
+       return 0;
+}
+
+static int iscsi_target_get_initial_payload(
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       u32 padding = 0, payload_length;
+       struct iscsi_login_req *login_req;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       payload_length = ntoh24(login_req->dlength);
+
+       pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x,"
+               " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n",
+               login_req->flags, login_req->itt, login_req->cmdsn,
+               login_req->exp_statsn, payload_length);
+
+       if (iscsi_target_check_login_request(conn, login) < 0)
+               return -1;
+
+       padding = ((-payload_length) & 3);
+
+       if (iscsi_login_rx_data(
+                       conn,
+                       login->req_buf,
+                       payload_length + padding) < 0)
+               return -1;
+
+       return 0;
+}
+
+/*
+ *     NOTE: We check for existing sessions or connections AFTER the initiator
+ *     has been successfully authenticated in order to protect against faked
+ *     ISID/TSIH combinations.
+ */
+static int iscsi_target_check_for_existing_instances(
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       if (login->checked_for_existing)
+               return 0;
+
+       login->checked_for_existing = 1;
+
+       if (!login->tsih)
+               return iscsi_check_for_session_reinstatement(conn);
+       else
+               return iscsi_login_post_auth_non_zero_tsih(conn, login->cid,
+                               login->initial_exp_statsn);
+}
+
+static int iscsi_target_do_authentication(
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       int authret;
+       u32 payload_length;
+       struct iscsi_param *param;
+       struct iscsi_login_req *login_req;
+       struct iscsi_login_rsp *login_rsp;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       login_rsp = (struct iscsi_login_rsp *) login->rsp;
+       payload_length = ntoh24(login_req->dlength);
+
+       param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list);
+       if (!param)
+               return -1;
+
+       authret = iscsi_handle_authentication(
+                       conn,
+                       login->req_buf,
+                       login->rsp_buf,
+                       payload_length,
+                       &login->rsp_length,
+                       param->value);
+       switch (authret) {
+       case 0:
+               pr_debug("Received OK response"
+               " from LIO Authentication, continuing.\n");
+               break;
+       case 1:
+               pr_debug("iSCSI security negotiation"
+                       " completed sucessfully.\n");
+               login->auth_complete = 1;
+               if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) &&
+                   (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+                       login_rsp->flags |= (ISCSI_FLAG_LOGIN_NEXT_STAGE1 |
+                                            ISCSI_FLAG_LOGIN_TRANSIT);
+                       login->current_stage = 1;
+               }
+               return iscsi_target_check_for_existing_instances(
+                               conn, login);
+       case 2:
+               pr_err("Security negotiation"
+                       " failed.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_AUTH_FAILED);
+               return -1;
+       default:
+               pr_err("Received unknown error %d from LIO"
+                               " Authentication\n", authret);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_TARGET_ERROR);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int iscsi_target_handle_csg_zero(
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       int ret;
+       u32 payload_length;
+       struct iscsi_param *param;
+       struct iscsi_login_req *login_req;
+       struct iscsi_login_rsp *login_rsp;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       login_rsp = (struct iscsi_login_rsp *) login->rsp;
+       payload_length = ntoh24(login_req->dlength);
+
+       param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list);
+       if (!param)
+               return -1;
+
+       ret = iscsi_decode_text_input(
+                       PHASE_SECURITY|PHASE_DECLARATIVE,
+                       SENDER_INITIATOR|SENDER_RECEIVER,
+                       login->req_buf,
+                       payload_length,
+                       conn->param_list);
+       if (ret < 0)
+               return -1;
+
+       if (ret > 0) {
+               if (login->auth_complete) {
+                       pr_err("Initiator has already been"
+                               " successfully authenticated, but is still"
+                               " sending %s keys.\n", param->value);
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                       ISCSI_LOGIN_STATUS_INIT_ERR);
+                       return -1;
+               }
+
+               goto do_auth;
+       }
+
+       if (login->first_request)
+               if (iscsi_target_check_first_request(conn, login) < 0)
+                       return -1;
+
+       ret = iscsi_encode_text_output(
+                       PHASE_SECURITY|PHASE_DECLARATIVE,
+                       SENDER_TARGET,
+                       login->rsp_buf,
+                       &login->rsp_length,
+                       conn->param_list);
+       if (ret < 0)
+               return -1;
+
+       if (!iscsi_check_negotiated_keys(conn->param_list)) {
+               if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication &&
+                   !strncmp(param->value, NONE, 4)) {
+                       pr_err("Initiator sent AuthMethod=None but"
+                               " Target is enforcing iSCSI Authentication,"
+                                       " login failed.\n");
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                       ISCSI_LOGIN_STATUS_AUTH_FAILED);
+                       return -1;
+               }
+
+               if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication &&
+                   !login->auth_complete)
+                       return 0;
+
+               if (strncmp(param->value, NONE, 4) && !login->auth_complete)
+                       return 0;
+
+               if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) &&
+                   (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+                       login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE1 |
+                                           ISCSI_FLAG_LOGIN_TRANSIT;
+                       login->current_stage = 1;
+               }
+       }
+
+       return 0;
+do_auth:
+       return iscsi_target_do_authentication(conn, login);
+}
+
+static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+       int ret;
+       u32 payload_length;
+       struct iscsi_login_req *login_req;
+       struct iscsi_login_rsp *login_rsp;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       login_rsp = (struct iscsi_login_rsp *) login->rsp;
+       payload_length = ntoh24(login_req->dlength);
+
+       ret = iscsi_decode_text_input(
+                       PHASE_OPERATIONAL|PHASE_DECLARATIVE,
+                       SENDER_INITIATOR|SENDER_RECEIVER,
+                       login->req_buf,
+                       payload_length,
+                       conn->param_list);
+       if (ret < 0)
+               return -1;
+
+       if (login->first_request)
+               if (iscsi_target_check_first_request(conn, login) < 0)
+                       return -1;
+
+       if (iscsi_target_check_for_existing_instances(conn, login) < 0)
+               return -1;
+
+       ret = iscsi_encode_text_output(
+                       PHASE_OPERATIONAL|PHASE_DECLARATIVE,
+                       SENDER_TARGET,
+                       login->rsp_buf,
+                       &login->rsp_length,
+                       conn->param_list);
+       if (ret < 0)
+               return -1;
+
+       if (!login->auth_complete &&
+            ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) {
+               pr_err("Initiator is requesting CSG: 1, has not been"
+                        " successfully authenticated, and the Target is"
+                       " enforcing iSCSI Authentication, login failed.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_AUTH_FAILED);
+               return -1;
+       }
+
+       if (!iscsi_check_negotiated_keys(conn->param_list))
+               if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE3) &&
+                   (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT))
+                       login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE3 |
+                                           ISCSI_FLAG_LOGIN_TRANSIT;
+
+       return 0;
+}
+
+static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+       int pdu_count = 0;
+       struct iscsi_login_req *login_req;
+       struct iscsi_login_rsp *login_rsp;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       login_rsp = (struct iscsi_login_rsp *) login->rsp;
+
+       while (1) {
+               if (++pdu_count > MAX_LOGIN_PDUS) {
+                       pr_err("MAX_LOGIN_PDUS count reached.\n");
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
+                       return -1;
+               }
+
+               switch ((login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2) {
+               case 0:
+                       login_rsp->flags |= (0 & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK);
+                       if (iscsi_target_handle_csg_zero(conn, login) < 0)
+                               return -1;
+                       break;
+               case 1:
+                       login_rsp->flags |= ISCSI_FLAG_LOGIN_CURRENT_STAGE1;
+                       if (iscsi_target_handle_csg_one(conn, login) < 0)
+                               return -1;
+                       if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+                               login->tsih = conn->sess->tsih;
+                               if (iscsi_target_do_tx_login_io(conn,
+                                               login) < 0)
+                                       return -1;
+                               return 0;
+                       }
+                       break;
+               default:
+                       pr_err("Illegal CSG: %d received from"
+                               " Initiator, protocol error.\n",
+                               (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK)
+                               >> 2);
+                       break;
+               }
+
+               if (iscsi_target_do_login_io(conn, login) < 0)
+                       return -1;
+
+               if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+                       login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT;
+                       login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK;
+               }
+       }
+
+       return 0;
+}
+
+static void iscsi_initiatorname_tolower(
+       char *param_buf)
+{
+       char *c;
+       u32 iqn_size = strlen(param_buf), i;
+
+       for (i = 0; i < iqn_size; i++) {
+               c = (char *)&param_buf[i];
+               if (!isupper(*c))
+                       continue;
+
+               *c = tolower(*c);
+       }
+}
+
+/*
+ * Processes the first Login Request..
+ */
+static int iscsi_target_locate_portal(
+       struct iscsi_np *np,
+       struct iscsi_conn *conn,
+       struct iscsi_login *login)
+{
+       char *i_buf = NULL, *s_buf = NULL, *t_buf = NULL;
+       char *tmpbuf, *start = NULL, *end = NULL, *key, *value;
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_tiqn *tiqn;
+       struct iscsi_login_req *login_req;
+       struct iscsi_targ_login_rsp *login_rsp;
+       u32 payload_length;
+       int sessiontype = 0, ret = 0;
+
+       login_req = (struct iscsi_login_req *) login->req;
+       login_rsp = (struct iscsi_targ_login_rsp *) login->rsp;
+       payload_length = ntoh24(login_req->dlength);
+
+       login->first_request    = 1;
+       login->leading_connection = (!login_req->tsih) ? 1 : 0;
+       login->current_stage    =
+               (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+       login->version_min      = login_req->min_version;
+       login->version_max      = login_req->max_version;
+       memcpy(login->isid, login_req->isid, 6);
+       login->cmd_sn           = login_req->cmdsn;
+       login->init_task_tag    = login_req->itt;
+       login->initial_exp_statsn = login_req->exp_statsn;
+       login->cid              = login_req->cid;
+       login->tsih             = login_req->tsih;
+
+       if (iscsi_target_get_initial_payload(conn, login) < 0)
+               return -1;
+
+       tmpbuf = kzalloc(payload_length + 1, GFP_KERNEL);
+       if (!tmpbuf) {
+               pr_err("Unable to allocate memory for tmpbuf.\n");
+               return -1;
+       }
+
+       memcpy(tmpbuf, login->req_buf, payload_length);
+       tmpbuf[payload_length] = '\0';
+       start = tmpbuf;
+       end = (start + payload_length);
+
+       /*
+        * Locate the initial keys expected from the Initiator node in
+        * the first login request in order to progress with the login phase.
+        */
+       while (start < end) {
+               if (iscsi_extract_key_value(start, &key, &value) < 0) {
+                       ret = -1;
+                       goto out;
+               }
+
+               if (!strncmp(key, "InitiatorName", 13))
+                       i_buf = value;
+               else if (!strncmp(key, "SessionType", 11))
+                       s_buf = value;
+               else if (!strncmp(key, "TargetName", 10))
+                       t_buf = value;
+
+               start += strlen(key) + strlen(value) + 2;
+       }
+
+       /*
+        * See 5.3.  Login Phase.
+        */
+       if (!i_buf) {
+               pr_err("InitiatorName key not received"
+                       " in first login request.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                       ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+               ret = -1;
+               goto out;
+       }
+       /*
+        * Convert the incoming InitiatorName to lowercase following
+        * RFC-3720 3.2.6.1. section c) that says that iSCSI IQNs
+        * are NOT case sensitive.
+        */
+       iscsi_initiatorname_tolower(i_buf);
+
+       if (!s_buf) {
+               if (!login->leading_connection)
+                       goto get_target;
+
+               pr_err("SessionType key not received"
+                       " in first login request.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                       ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+               ret = -1;
+               goto out;
+       }
+
+       /*
+        * Use default portal group for discovery sessions.
+        */
+       sessiontype = strncmp(s_buf, DISCOVERY, 9);
+       if (!sessiontype) {
+               conn->tpg = iscsit_global->discovery_tpg;
+               if (!login->leading_connection)
+                       goto get_target;
+
+               sess->sess_ops->SessionType = 1;
+               /*
+                * Setup crc32c modules from libcrypto
+                */
+               if (iscsi_login_setup_crypto(conn) < 0) {
+                       pr_err("iscsi_login_setup_crypto() failed\n");
+                       ret = -1;
+                       goto out;
+               }
+               /*
+                * Serialize access across the discovery struct iscsi_portal_group to
+                * process login attempt.
+                */
+               if (iscsit_access_np(np, conn->tpg) < 0) {
+                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+                       ret = -1;
+                       goto out;
+               }
+               ret = 0;
+               goto out;
+       }
+
+get_target:
+       if (!t_buf) {
+               pr_err("TargetName key not received"
+                       " in first login request while"
+                       " SessionType=Normal.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                       ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+               ret = -1;
+               goto out;
+       }
+
+       /*
+        * Locate Target IQN from Storage Node.
+        */
+       tiqn = iscsit_get_tiqn_for_login(t_buf);
+       if (!tiqn) {
+               pr_err("Unable to locate Target IQN: %s in"
+                       " Storage Node\n", t_buf);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+               ret = -1;
+               goto out;
+       }
+       pr_debug("Located Storage Object: %s\n", tiqn->tiqn);
+
+       /*
+        * Locate Target Portal Group from Storage Node.
+        */
+       conn->tpg = iscsit_get_tpg_from_np(tiqn, np);
+       if (!conn->tpg) {
+               pr_err("Unable to locate Target Portal Group"
+                               " on %s\n", tiqn->tiqn);
+               iscsit_put_tiqn_for_login(tiqn);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+               ret = -1;
+               goto out;
+       }
+       pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt);
+       /*
+        * Setup crc32c modules from libcrypto
+        */
+       if (iscsi_login_setup_crypto(conn) < 0) {
+               pr_err("iscsi_login_setup_crypto() failed\n");
+               ret = -1;
+               goto out;
+       }
+       /*
+        * Serialize access across the struct iscsi_portal_group to
+        * process login attempt.
+        */
+       if (iscsit_access_np(np, conn->tpg) < 0) {
+               iscsit_put_tiqn_for_login(tiqn);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+               ret = -1;
+               conn->tpg = NULL;
+               goto out;
+       }
+
+       /*
+        * conn->sess->node_acl will be set when the referenced
+        * struct iscsi_session is located from received ISID+TSIH in
+        * iscsi_login_non_zero_tsih_s2().
+        */
+       if (!login->leading_connection) {
+               ret = 0;
+               goto out;
+       }
+
+       /*
+        * This value is required in iscsi_login_zero_tsih_s2()
+        */
+       sess->sess_ops->SessionType = 0;
+
+       /*
+        * Locate incoming Initiator IQN reference from Storage Node.
+        */
+       sess->se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
+                       &conn->tpg->tpg_se_tpg, i_buf);
+       if (!sess->se_sess->se_node_acl) {
+               pr_err("iSCSI Initiator Node: %s is not authorized to"
+                       " access iSCSI target portal group: %hu.\n",
+                               i_buf, conn->tpg->tpgt);
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                               ISCSI_LOGIN_STATUS_TGT_FORBIDDEN);
+               ret = -1;
+               goto out;
+       }
+
+       ret = 0;
+out:
+       kfree(tmpbuf);
+       return ret;
+}
+
+struct iscsi_login *iscsi_target_init_negotiation(
+       struct iscsi_np *np,
+       struct iscsi_conn *conn,
+       char *login_pdu)
+{
+       struct iscsi_login *login;
+
+       login = kzalloc(sizeof(struct iscsi_login), GFP_KERNEL);
+       if (!login) {
+               pr_err("Unable to allocate memory for struct iscsi_login.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               goto out;
+       }
+
+       login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+       if (!login->req) {
+               pr_err("Unable to allocate memory for Login Request.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               goto out;
+       }
+       memcpy(login->req, login_pdu, ISCSI_HDR_LEN);
+
+       login->req_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL);
+       if (!login->req_buf) {
+               pr_err("Unable to allocate memory for response buffer.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               goto out;
+       }
+       /*
+        * SessionType: Discovery
+        *
+        *      Locates Default Portal
+        *
+        * SessionType: Normal
+        *
+        *      Locates Target Portal from NP -> Target IQN
+        */
+       if (iscsi_target_locate_portal(np, conn, login) < 0) {
+               pr_err("iSCSI Login negotiation failed.\n");
+               goto out;
+       }
+
+       return login;
+out:
+       kfree(login->req);
+       kfree(login->req_buf);
+       kfree(login);
+
+       return NULL;
+}
+
+int iscsi_target_start_negotiation(
+       struct iscsi_login *login,
+       struct iscsi_conn *conn)
+{
+       int ret = -1;
+
+       login->rsp = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+       if (!login->rsp) {
+               pr_err("Unable to allocate memory for"
+                               " Login Response.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               ret = -1;
+               goto out;
+       }
+
+       login->rsp_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL);
+       if (!login->rsp_buf) {
+               pr_err("Unable to allocate memory for"
+                       " request buffer.\n");
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                               ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               ret = -1;
+               goto out;
+       }
+
+       ret = iscsi_target_do_login(conn, login);
+out:
+       if (ret != 0)
+               iscsi_remove_failed_auth_entry(conn);
+
+       iscsi_target_nego_release(login, conn);
+       return ret;
+}
+
+void iscsi_target_nego_release(
+       struct iscsi_login *login,
+       struct iscsi_conn *conn)
+{
+       kfree(login->req);
+       kfree(login->rsp);
+       kfree(login->req_buf);
+       kfree(login->rsp_buf);
+       kfree(login);
+}
diff --git a/drivers/target/iscsi/iscsi_target_nego.h b/drivers/target/iscsi/iscsi_target_nego.h
new file mode 100644 (file)
index 0000000..92e133a
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef ISCSI_TARGET_NEGO_H
+#define ISCSI_TARGET_NEGO_H
+
+#define DECIMAL         0
+#define HEX             1
+
+extern void convert_null_to_semi(char *, int);
+extern int extract_param(const char *, const char *, unsigned int, char *,
+               unsigned char *);
+extern struct iscsi_login *iscsi_target_init_negotiation(
+               struct iscsi_np *, struct iscsi_conn *, char *);
+extern int iscsi_target_start_negotiation(
+               struct iscsi_login *, struct iscsi_conn *);
+extern void iscsi_target_nego_release(
+               struct iscsi_login *, struct iscsi_conn *);
+
+#endif /* ISCSI_TARGET_NEGO_H */
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c
new file mode 100644 (file)
index 0000000..aeafbe0
--- /dev/null
@@ -0,0 +1,263 @@
+/*******************************************************************************
+ * This file contains the main functions related to Initiator Node Attributes.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_nodeattrib.h"
+
+static inline char *iscsit_na_get_initiatorname(
+       struct iscsi_node_acl *nacl)
+{
+       struct se_node_acl *se_nacl = &nacl->se_node_acl;
+
+       return &se_nacl->initiatorname[0];
+}
+
+void iscsit_set_default_node_attribues(
+       struct iscsi_node_acl *acl)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       a->dataout_timeout = NA_DATAOUT_TIMEOUT;
+       a->dataout_timeout_retries = NA_DATAOUT_TIMEOUT_RETRIES;
+       a->nopin_timeout = NA_NOPIN_TIMEOUT;
+       a->nopin_response_timeout = NA_NOPIN_RESPONSE_TIMEOUT;
+       a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS;
+       a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS;
+       a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS;
+       a->default_erl = NA_DEFAULT_ERL;
+}
+
+extern int iscsit_na_dataout_timeout(
+       struct iscsi_node_acl *acl,
+       u32 dataout_timeout)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (dataout_timeout > NA_DATAOUT_TIMEOUT_MAX) {
+               pr_err("Requested DataOut Timeout %u larger than"
+                       " maximum %u\n", dataout_timeout,
+                       NA_DATAOUT_TIMEOUT_MAX);
+               return -EINVAL;
+       } else if (dataout_timeout < NA_DATAOUT_TIMEOUT_MIX) {
+               pr_err("Requested DataOut Timeout %u smaller than"
+                       " minimum %u\n", dataout_timeout,
+                       NA_DATAOUT_TIMEOUT_MIX);
+               return -EINVAL;
+       }
+
+       a->dataout_timeout = dataout_timeout;
+       pr_debug("Set DataOut Timeout to %u for Initiator Node"
+               " %s\n", a->dataout_timeout, iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
+
+extern int iscsit_na_dataout_timeout_retries(
+       struct iscsi_node_acl *acl,
+       u32 dataout_timeout_retries)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (dataout_timeout_retries > NA_DATAOUT_TIMEOUT_RETRIES_MAX) {
+               pr_err("Requested DataOut Timeout Retries %u larger"
+                       " than maximum %u", dataout_timeout_retries,
+                               NA_DATAOUT_TIMEOUT_RETRIES_MAX);
+               return -EINVAL;
+       } else if (dataout_timeout_retries < NA_DATAOUT_TIMEOUT_RETRIES_MIN) {
+               pr_err("Requested DataOut Timeout Retries %u smaller"
+                       " than minimum %u", dataout_timeout_retries,
+                               NA_DATAOUT_TIMEOUT_RETRIES_MIN);
+               return -EINVAL;
+       }
+
+       a->dataout_timeout_retries = dataout_timeout_retries;
+       pr_debug("Set DataOut Timeout Retries to %u for"
+               " Initiator Node %s\n", a->dataout_timeout_retries,
+               iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
+
+extern int iscsit_na_nopin_timeout(
+       struct iscsi_node_acl *acl,
+       u32 nopin_timeout)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+       struct iscsi_session *sess;
+       struct iscsi_conn *conn;
+       struct se_node_acl *se_nacl = &a->nacl->se_node_acl;
+       struct se_session *se_sess;
+       u32 orig_nopin_timeout = a->nopin_timeout;
+
+       if (nopin_timeout > NA_NOPIN_TIMEOUT_MAX) {
+               pr_err("Requested NopIn Timeout %u larger than maximum"
+                       " %u\n", nopin_timeout, NA_NOPIN_TIMEOUT_MAX);
+               return -EINVAL;
+       } else if ((nopin_timeout < NA_NOPIN_TIMEOUT_MIN) &&
+                  (nopin_timeout != 0)) {
+               pr_err("Requested NopIn Timeout %u smaller than"
+                       " minimum %u and not 0\n", nopin_timeout,
+                       NA_NOPIN_TIMEOUT_MIN);
+               return -EINVAL;
+       }
+
+       a->nopin_timeout = nopin_timeout;
+       pr_debug("Set NopIn Timeout to %u for Initiator"
+               " Node %s\n", a->nopin_timeout,
+               iscsit_na_get_initiatorname(acl));
+       /*
+        * Reenable disabled nopin_timeout timer for all iSCSI connections.
+        */
+       if (!orig_nopin_timeout) {
+               spin_lock_bh(&se_nacl->nacl_sess_lock);
+               se_sess = se_nacl->nacl_sess;
+               if (se_sess) {
+                       sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+
+                       spin_lock(&sess->conn_lock);
+                       list_for_each_entry(conn, &sess->sess_conn_list,
+                                       conn_list) {
+                               if (conn->conn_state !=
+                                               TARG_CONN_STATE_LOGGED_IN)
+                                       continue;
+
+                               spin_lock(&conn->nopin_timer_lock);
+                               __iscsit_start_nopin_timer(conn);
+                               spin_unlock(&conn->nopin_timer_lock);
+                       }
+                       spin_unlock(&sess->conn_lock);
+               }
+               spin_unlock_bh(&se_nacl->nacl_sess_lock);
+       }
+
+       return 0;
+}
+
+extern int iscsit_na_nopin_response_timeout(
+       struct iscsi_node_acl *acl,
+       u32 nopin_response_timeout)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (nopin_response_timeout > NA_NOPIN_RESPONSE_TIMEOUT_MAX) {
+               pr_err("Requested NopIn Response Timeout %u larger"
+                       " than maximum %u\n", nopin_response_timeout,
+                               NA_NOPIN_RESPONSE_TIMEOUT_MAX);
+               return -EINVAL;
+       } else if (nopin_response_timeout < NA_NOPIN_RESPONSE_TIMEOUT_MIN) {
+               pr_err("Requested NopIn Response Timeout %u smaller"
+                       " than minimum %u\n", nopin_response_timeout,
+                               NA_NOPIN_RESPONSE_TIMEOUT_MIN);
+               return -EINVAL;
+       }
+
+       a->nopin_response_timeout = nopin_response_timeout;
+       pr_debug("Set NopIn Response Timeout to %u for"
+               " Initiator Node %s\n", a->nopin_timeout,
+               iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
+
+extern int iscsit_na_random_datain_pdu_offsets(
+       struct iscsi_node_acl *acl,
+       u32 random_datain_pdu_offsets)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (random_datain_pdu_offsets != 0 && random_datain_pdu_offsets != 1) {
+               pr_err("Requested Random DataIN PDU Offsets: %u not"
+                       " 0 or 1\n", random_datain_pdu_offsets);
+               return -EINVAL;
+       }
+
+       a->random_datain_pdu_offsets = random_datain_pdu_offsets;
+       pr_debug("Set Random DataIN PDU Offsets to %u for"
+               " Initiator Node %s\n", a->random_datain_pdu_offsets,
+               iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
+
+extern int iscsit_na_random_datain_seq_offsets(
+       struct iscsi_node_acl *acl,
+       u32 random_datain_seq_offsets)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (random_datain_seq_offsets != 0 && random_datain_seq_offsets != 1) {
+               pr_err("Requested Random DataIN Sequence Offsets: %u"
+                       " not 0 or 1\n", random_datain_seq_offsets);
+               return -EINVAL;
+       }
+
+       a->random_datain_seq_offsets = random_datain_seq_offsets;
+       pr_debug("Set Random DataIN Sequence Offsets to %u for"
+               " Initiator Node %s\n", a->random_datain_seq_offsets,
+               iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
+
+extern int iscsit_na_random_r2t_offsets(
+       struct iscsi_node_acl *acl,
+       u32 random_r2t_offsets)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (random_r2t_offsets != 0 && random_r2t_offsets != 1) {
+               pr_err("Requested Random R2T Offsets: %u not"
+                       " 0 or 1\n", random_r2t_offsets);
+               return -EINVAL;
+       }
+
+       a->random_r2t_offsets = random_r2t_offsets;
+       pr_debug("Set Random R2T Offsets to %u for"
+               " Initiator Node %s\n", a->random_r2t_offsets,
+               iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
+
+extern int iscsit_na_default_erl(
+       struct iscsi_node_acl *acl,
+       u32 default_erl)
+{
+       struct iscsi_node_attrib *a = &acl->node_attrib;
+
+       if (default_erl != 0 && default_erl != 1 && default_erl != 2) {
+               pr_err("Requested default ERL: %u not 0, 1, or 2\n",
+                               default_erl);
+               return -EINVAL;
+       }
+
+       a->default_erl = default_erl;
+       pr_debug("Set use ERL0 flag to %u for Initiator"
+               " Node %s\n", a->default_erl,
+               iscsit_na_get_initiatorname(acl));
+
+       return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h
new file mode 100644 (file)
index 0000000..c970b32
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef ISCSI_TARGET_NODEATTRIB_H
+#define ISCSI_TARGET_NODEATTRIB_H
+
+extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *);
+extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32);
+extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_nopin_response_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_datain_pdu_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_datain_seq_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_r2t_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_default_erl(struct iscsi_node_acl *, u32);
+
+#endif /* ISCSI_TARGET_NODEATTRIB_H */
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
new file mode 100644 (file)
index 0000000..252e246
--- /dev/null
@@ -0,0 +1,1905 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI Parameter negotiation.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/slab.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_parameters.h"
+
+int iscsi_login_rx_data(
+       struct iscsi_conn *conn,
+       char *buf,
+       int length)
+{
+       int rx_got;
+       struct kvec iov;
+
+       memset(&iov, 0, sizeof(struct kvec));
+       iov.iov_len     = length;
+       iov.iov_base    = buf;
+
+       /*
+        * Initial Marker-less Interval.
+        * Add the values regardless of IFMarker/OFMarker, considering
+        * it may not be negoitated yet.
+        */
+       conn->of_marker += length;
+
+       rx_got = rx_data(conn, &iov, 1, length);
+       if (rx_got != length) {
+               pr_err("rx_data returned %d, expecting %d.\n",
+                               rx_got, length);
+               return -1;
+       }
+
+       return 0 ;
+}
+
+int iscsi_login_tx_data(
+       struct iscsi_conn *conn,
+       char *pdu_buf,
+       char *text_buf,
+       int text_length)
+{
+       int length, tx_sent;
+       struct kvec iov[2];
+
+       length = (ISCSI_HDR_LEN + text_length);
+
+       memset(&iov[0], 0, 2 * sizeof(struct kvec));
+       iov[0].iov_len          = ISCSI_HDR_LEN;
+       iov[0].iov_base         = pdu_buf;
+       iov[1].iov_len          = text_length;
+       iov[1].iov_base         = text_buf;
+
+       /*
+        * Initial Marker-less Interval.
+        * Add the values regardless of IFMarker/OFMarker, considering
+        * it may not be negoitated yet.
+        */
+       conn->if_marker += length;
+
+       tx_sent = tx_data(conn, &iov[0], 2, length);
+       if (tx_sent != length) {
+               pr_err("tx_data returned %d, expecting %d.\n",
+                               tx_sent, length);
+               return -1;
+       }
+
+       return 0;
+}
+
+void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops)
+{
+       pr_debug("HeaderDigest: %s\n", (conn_ops->HeaderDigest) ?
+                               "CRC32C" : "None");
+       pr_debug("DataDigest: %s\n", (conn_ops->DataDigest) ?
+                               "CRC32C" : "None");
+       pr_debug("MaxRecvDataSegmentLength: %u\n",
+                               conn_ops->MaxRecvDataSegmentLength);
+       pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No");
+       pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No");
+       if (conn_ops->OFMarker)
+               pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt);
+       if (conn_ops->IFMarker)
+               pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt);
+}
+
+void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops)
+{
+       pr_debug("InitiatorName: %s\n", sess_ops->InitiatorName);
+       pr_debug("InitiatorAlias: %s\n", sess_ops->InitiatorAlias);
+       pr_debug("TargetName: %s\n", sess_ops->TargetName);
+       pr_debug("TargetAlias: %s\n", sess_ops->TargetAlias);
+       pr_debug("TargetPortalGroupTag: %hu\n",
+                       sess_ops->TargetPortalGroupTag);
+       pr_debug("MaxConnections: %hu\n", sess_ops->MaxConnections);
+       pr_debug("InitialR2T: %s\n",
+                       (sess_ops->InitialR2T) ? "Yes" : "No");
+       pr_debug("ImmediateData: %s\n", (sess_ops->ImmediateData) ?
+                       "Yes" : "No");
+       pr_debug("MaxBurstLength: %u\n", sess_ops->MaxBurstLength);
+       pr_debug("FirstBurstLength: %u\n", sess_ops->FirstBurstLength);
+       pr_debug("DefaultTime2Wait: %hu\n", sess_ops->DefaultTime2Wait);
+       pr_debug("DefaultTime2Retain: %hu\n",
+                       sess_ops->DefaultTime2Retain);
+       pr_debug("MaxOutstandingR2T: %hu\n",
+                       sess_ops->MaxOutstandingR2T);
+       pr_debug("DataPDUInOrder: %s\n",
+                       (sess_ops->DataPDUInOrder) ? "Yes" : "No");
+       pr_debug("DataSequenceInOrder: %s\n",
+                       (sess_ops->DataSequenceInOrder) ? "Yes" : "No");
+       pr_debug("ErrorRecoveryLevel: %hu\n",
+                       sess_ops->ErrorRecoveryLevel);
+       pr_debug("SessionType: %s\n", (sess_ops->SessionType) ?
+                       "Discovery" : "Normal");
+}
+
+void iscsi_print_params(struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param;
+
+       list_for_each_entry(param, &param_list->param_list, p_list)
+               pr_debug("%s: %s\n", param->name, param->value);
+}
+
+static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *param_list,
+               char *name, char *value, u8 phase, u8 scope, u8 sender,
+               u16 type_range, u8 use)
+{
+       struct iscsi_param *param = NULL;
+
+       param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
+       if (!param) {
+               pr_err("Unable to allocate memory for parameter.\n");
+               goto out;
+       }
+       INIT_LIST_HEAD(&param->p_list);
+
+       param->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+       if (!param->name) {
+               pr_err("Unable to allocate memory for parameter name.\n");
+               goto out;
+       }
+
+       param->value = kzalloc(strlen(value) + 1, GFP_KERNEL);
+       if (!param->value) {
+               pr_err("Unable to allocate memory for parameter value.\n");
+               goto out;
+       }
+
+       memcpy(param->name, name, strlen(name));
+       param->name[strlen(name)] = '\0';
+       memcpy(param->value, value, strlen(value));
+       param->value[strlen(value)] = '\0';
+       param->phase            = phase;
+       param->scope            = scope;
+       param->sender           = sender;
+       param->use              = use;
+       param->type_range       = type_range;
+
+       switch (param->type_range) {
+       case TYPERANGE_BOOL_AND:
+               param->type = TYPE_BOOL_AND;
+               break;
+       case TYPERANGE_BOOL_OR:
+               param->type = TYPE_BOOL_OR;
+               break;
+       case TYPERANGE_0_TO_2:
+       case TYPERANGE_0_TO_3600:
+       case TYPERANGE_0_TO_32767:
+       case TYPERANGE_0_TO_65535:
+       case TYPERANGE_1_TO_65535:
+       case TYPERANGE_2_TO_3600:
+       case TYPERANGE_512_TO_16777215:
+               param->type = TYPE_NUMBER;
+               break;
+       case TYPERANGE_AUTH:
+       case TYPERANGE_DIGEST:
+               param->type = TYPE_VALUE_LIST | TYPE_STRING;
+               break;
+       case TYPERANGE_MARKINT:
+               param->type = TYPE_NUMBER_RANGE;
+               param->type_range |= TYPERANGE_1_TO_65535;
+               break;
+       case TYPERANGE_ISCSINAME:
+       case TYPERANGE_SESSIONTYPE:
+       case TYPERANGE_TARGETADDRESS:
+       case TYPERANGE_UTF8:
+               param->type = TYPE_STRING;
+               break;
+       default:
+               pr_err("Unknown type_range 0x%02x\n",
+                               param->type_range);
+               goto out;
+       }
+       list_add_tail(&param->p_list, &param_list->param_list);
+
+       return param;
+out:
+       if (param) {
+               kfree(param->value);
+               kfree(param->name);
+               kfree(param);
+       }
+
+       return NULL;
+}
+
+/* #warning Add extension keys */
+int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr)
+{
+       struct iscsi_param *param = NULL;
+       struct iscsi_param_list *pl;
+
+       pl = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
+       if (!pl) {
+               pr_err("Unable to allocate memory for"
+                               " struct iscsi_param_list.\n");
+               return -1 ;
+       }
+       INIT_LIST_HEAD(&pl->param_list);
+       INIT_LIST_HEAD(&pl->extra_response_list);
+
+       /*
+        * The format for setting the initial parameter definitions are:
+        *
+        * Parameter name:
+        * Initial value:
+        * Allowable phase:
+        * Scope:
+        * Allowable senders:
+        * Typerange:
+        * Use:
+        */
+       param = iscsi_set_default_param(pl, AUTHMETHOD, INITIAL_AUTHMETHOD,
+                       PHASE_SECURITY, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_AUTH, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, HEADERDIGEST, INITIAL_HEADERDIGEST,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_DIGEST, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, DATADIGEST, INITIAL_DATADIGEST,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_DIGEST, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, MAXCONNECTIONS,
+                       INITIAL_MAXCONNECTIONS, PHASE_OPERATIONAL,
+                       SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_1_TO_65535, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, SENDTARGETS, INITIAL_SENDTARGETS,
+                       PHASE_FFP0, SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+                       TYPERANGE_UTF8, 0);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, TARGETNAME, INITIAL_TARGETNAME,
+                       PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_ISCSINAME, USE_ALL);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, INITIATORNAME,
+                       INITIAL_INITIATORNAME, PHASE_DECLARATIVE,
+                       SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+                       TYPERANGE_ISCSINAME, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, TARGETALIAS, INITIAL_TARGETALIAS,
+                       PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET,
+                       TYPERANGE_UTF8, USE_ALL);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, INITIATORALIAS,
+                       INITIAL_INITIATORALIAS, PHASE_DECLARATIVE,
+                       SCOPE_SESSION_WIDE, SENDER_INITIATOR, TYPERANGE_UTF8,
+                       USE_ALL);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, TARGETADDRESS,
+                       INITIAL_TARGETADDRESS, PHASE_DECLARATIVE,
+                       SCOPE_SESSION_WIDE, SENDER_TARGET,
+                       TYPERANGE_TARGETADDRESS, USE_ALL);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, TARGETPORTALGROUPTAG,
+                       INITIAL_TARGETPORTALGROUPTAG,
+                       PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET,
+                       TYPERANGE_0_TO_65535, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, INITIALR2T, INITIAL_INITIALR2T,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_BOOL_OR, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, IMMEDIATEDATA,
+                       INITIAL_IMMEDIATEDATA, PHASE_OPERATIONAL,
+                       SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_AND,
+                       USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, MAXRECVDATASEGMENTLENGTH,
+                       INITIAL_MAXRECVDATASEGMENTLENGTH,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_512_TO_16777215, USE_ALL);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, MAXBURSTLENGTH,
+                       INITIAL_MAXBURSTLENGTH, PHASE_OPERATIONAL,
+                       SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_512_TO_16777215, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, FIRSTBURSTLENGTH,
+                       INITIAL_FIRSTBURSTLENGTH,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_512_TO_16777215, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, DEFAULTTIME2WAIT,
+                       INITIAL_DEFAULTTIME2WAIT,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_0_TO_3600, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, DEFAULTTIME2RETAIN,
+                       INITIAL_DEFAULTTIME2RETAIN,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_0_TO_3600, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, MAXOUTSTANDINGR2T,
+                       INITIAL_MAXOUTSTANDINGR2T,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_1_TO_65535, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, DATAPDUINORDER,
+                       INITIAL_DATAPDUINORDER, PHASE_OPERATIONAL,
+                       SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_OR,
+                       USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, DATASEQUENCEINORDER,
+                       INITIAL_DATASEQUENCEINORDER,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_BOOL_OR, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, ERRORRECOVERYLEVEL,
+                       INITIAL_ERRORRECOVERYLEVEL,
+                       PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+                       TYPERANGE_0_TO_2, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, SESSIONTYPE, INITIAL_SESSIONTYPE,
+                       PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+                       TYPERANGE_SESSIONTYPE, USE_LEADING_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, IFMARKER, INITIAL_IFMARKER,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_BOOL_AND, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, OFMARKER, INITIAL_OFMARKER,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_BOOL_AND, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_MARKINT, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT,
+                       PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+                       TYPERANGE_MARKINT, USE_INITIAL_ONLY);
+       if (!param)
+               goto out;
+
+       *param_list_ptr = pl;
+       return 0;
+out:
+       iscsi_release_param_list(pl);
+       return -1;
+}
+
+int iscsi_set_keys_to_negotiate(
+       int sessiontype,
+       struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param;
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               param->state = 0;
+               if (!strcmp(param->name, AUTHMETHOD)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, HEADERDIGEST)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, DATADIGEST)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, MAXCONNECTIONS)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, TARGETNAME)) {
+                       continue;
+               } else if (!strcmp(param->name, INITIATORNAME)) {
+                       continue;
+               } else if (!strcmp(param->name, TARGETALIAS)) {
+                       if (param->value)
+                               SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, INITIATORALIAS)) {
+                       continue;
+               } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, INITIALR2T)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, IMMEDIATEDATA)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, MAXBURSTLENGTH)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, DATAPDUINORDER)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, DATASEQUENCEINORDER)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, SESSIONTYPE)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, IFMARKER)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, OFMARKER)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, IFMARKINT)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               } else if (!strcmp(param->name, OFMARKINT)) {
+                       SET_PSTATE_NEGOTIATE(param);
+               }
+       }
+
+       return 0;
+}
+
+int iscsi_set_keys_irrelevant_for_discovery(
+       struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param;
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!strcmp(param->name, MAXCONNECTIONS))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, INITIALR2T))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, IMMEDIATEDATA))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, MAXBURSTLENGTH))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, FIRSTBURSTLENGTH))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, MAXOUTSTANDINGR2T))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, DATAPDUINORDER))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, DATASEQUENCEINORDER))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, ERRORRECOVERYLEVEL))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, DEFAULTTIME2WAIT))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, DEFAULTTIME2RETAIN))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, IFMARKER))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, OFMARKER))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, IFMARKINT))
+                       param->state &= ~PSTATE_NEGOTIATE;
+               else if (!strcmp(param->name, OFMARKINT))
+                       param->state &= ~PSTATE_NEGOTIATE;
+       }
+
+       return 0;
+}
+
+int iscsi_copy_param_list(
+       struct iscsi_param_list **dst_param_list,
+       struct iscsi_param_list *src_param_list,
+       int leading)
+{
+       struct iscsi_param *new_param = NULL, *param = NULL;
+       struct iscsi_param_list *param_list = NULL;
+
+       param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
+       if (!param_list) {
+               pr_err("Unable to allocate memory for"
+                               " struct iscsi_param_list.\n");
+               goto err_out;
+       }
+       INIT_LIST_HEAD(&param_list->param_list);
+       INIT_LIST_HEAD(&param_list->extra_response_list);
+
+       list_for_each_entry(param, &src_param_list->param_list, p_list) {
+               if (!leading && (param->scope & SCOPE_SESSION_WIDE)) {
+                       if ((strcmp(param->name, "TargetName") != 0) &&
+                           (strcmp(param->name, "InitiatorName") != 0) &&
+                           (strcmp(param->name, "TargetPortalGroupTag") != 0))
+                               continue;
+               }
+
+               new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
+               if (!new_param) {
+                       pr_err("Unable to allocate memory for"
+                               " struct iscsi_param.\n");
+                       goto err_out;
+               }
+
+               new_param->set_param = param->set_param;
+               new_param->phase = param->phase;
+               new_param->scope = param->scope;
+               new_param->sender = param->sender;
+               new_param->type = param->type;
+               new_param->use = param->use;
+               new_param->type_range = param->type_range;
+
+               new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
+               if (!new_param->name) {
+                       pr_err("Unable to allocate memory for"
+                               " parameter name.\n");
+                       goto err_out;
+               }
+
+               new_param->value = kzalloc(strlen(param->value) + 1,
+                               GFP_KERNEL);
+               if (!new_param->value) {
+                       pr_err("Unable to allocate memory for"
+                               " parameter value.\n");
+                       goto err_out;
+               }
+
+               memcpy(new_param->name, param->name, strlen(param->name));
+               new_param->name[strlen(param->name)] = '\0';
+               memcpy(new_param->value, param->value, strlen(param->value));
+               new_param->value[strlen(param->value)] = '\0';
+
+               list_add_tail(&new_param->p_list, &param_list->param_list);
+       }
+
+       if (!list_empty(&param_list->param_list))
+               *dst_param_list = param_list;
+       else {
+               pr_err("No parameters allocated.\n");
+               goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       iscsi_release_param_list(param_list);
+       return -1;
+}
+
+static void iscsi_release_extra_responses(struct iscsi_param_list *param_list)
+{
+       struct iscsi_extra_response *er, *er_tmp;
+
+       list_for_each_entry_safe(er, er_tmp, &param_list->extra_response_list,
+                       er_list) {
+               list_del(&er->er_list);
+               kfree(er);
+       }
+}
+
+void iscsi_release_param_list(struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param, *param_tmp;
+
+       list_for_each_entry_safe(param, param_tmp, &param_list->param_list,
+                       p_list) {
+               list_del(&param->p_list);
+
+               kfree(param->name);
+               param->name = NULL;
+               kfree(param->value);
+               param->value = NULL;
+               kfree(param);
+               param = NULL;
+       }
+
+       iscsi_release_extra_responses(param_list);
+
+       kfree(param_list);
+}
+
+struct iscsi_param *iscsi_find_param_from_key(
+       char *key,
+       struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param;
+
+       if (!key || !param_list) {
+               pr_err("Key or parameter list pointer is NULL.\n");
+               return NULL;
+       }
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!strcmp(key, param->name))
+                       return param;
+       }
+
+       pr_err("Unable to locate key \"%s\".\n", key);
+       return NULL;
+}
+
+int iscsi_extract_key_value(char *textbuf, char **key, char **value)
+{
+       *value = strchr(textbuf, '=');
+       if (!*value) {
+               pr_err("Unable to locate \"=\" seperator for key,"
+                               " ignoring request.\n");
+               return -1;
+       }
+
+       *key = textbuf;
+       **value = '\0';
+       *value = *value + 1;
+
+       return 0;
+}
+
+int iscsi_update_param_value(struct iscsi_param *param, char *value)
+{
+       kfree(param->value);
+
+       param->value = kzalloc(strlen(value) + 1, GFP_KERNEL);
+       if (!param->value) {
+               pr_err("Unable to allocate memory for value.\n");
+               return -1;
+       }
+
+       memcpy(param->value, value, strlen(value));
+       param->value[strlen(value)] = '\0';
+
+       pr_debug("iSCSI Parameter updated to %s=%s\n",
+                       param->name, param->value);
+       return 0;
+}
+
+static int iscsi_add_notunderstood_response(
+       char *key,
+       char *value,
+       struct iscsi_param_list *param_list)
+{
+       struct iscsi_extra_response *extra_response;
+
+       if (strlen(value) > VALUE_MAXLEN) {
+               pr_err("Value for notunderstood key \"%s\" exceeds %d,"
+                       " protocol error.\n", key, VALUE_MAXLEN);
+               return -1;
+       }
+
+       extra_response = kzalloc(sizeof(struct iscsi_extra_response), GFP_KERNEL);
+       if (!extra_response) {
+               pr_err("Unable to allocate memory for"
+                       " struct iscsi_extra_response.\n");
+               return -1;
+       }
+       INIT_LIST_HEAD(&extra_response->er_list);
+
+       strncpy(extra_response->key, key, strlen(key) + 1);
+       strncpy(extra_response->value, NOTUNDERSTOOD,
+                       strlen(NOTUNDERSTOOD) + 1);
+
+       list_add_tail(&extra_response->er_list,
+                       &param_list->extra_response_list);
+       return 0;
+}
+
+static int iscsi_check_for_auth_key(char *key)
+{
+       /*
+        * RFC 1994
+        */
+       if (!strcmp(key, "CHAP_A") || !strcmp(key, "CHAP_I") ||
+           !strcmp(key, "CHAP_C") || !strcmp(key, "CHAP_N") ||
+           !strcmp(key, "CHAP_R"))
+               return 1;
+
+       /*
+        * RFC 2945
+        */
+       if (!strcmp(key, "SRP_U") || !strcmp(key, "SRP_N") ||
+           !strcmp(key, "SRP_g") || !strcmp(key, "SRP_s") ||
+           !strcmp(key, "SRP_A") || !strcmp(key, "SRP_B") ||
+           !strcmp(key, "SRP_M") || !strcmp(key, "SRP_HM"))
+               return 1;
+
+       return 0;
+}
+
+static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
+{
+       if (IS_TYPE_BOOL_AND(param)) {
+               if (!strcmp(param->value, NO))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+       } else if (IS_TYPE_BOOL_OR(param)) {
+               if (!strcmp(param->value, YES))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+                /*
+                 * Required for gPXE iSCSI boot client
+                 */
+               if (!strcmp(param->name, IMMEDIATEDATA))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+       } else if (IS_TYPE_NUMBER(param)) {
+               if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+               /*
+                * The GlobalSAN iSCSI Initiator for MacOSX does
+                * not respond to MaxBurstLength, FirstBurstLength,
+                * DefaultTime2Wait or DefaultTime2Retain parameter keys.
+                * So, we set them to 'reply optional' here, and assume the
+                * the defaults from iscsi_parameters.h if the initiator
+                * is not RFC compliant and the keys are not negotiated.
+                */
+               if (!strcmp(param->name, MAXBURSTLENGTH))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+               if (!strcmp(param->name, FIRSTBURSTLENGTH))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+               if (!strcmp(param->name, DEFAULTTIME2WAIT))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+               if (!strcmp(param->name, DEFAULTTIME2RETAIN))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+               /*
+                * Required for gPXE iSCSI boot client
+                */
+               if (!strcmp(param->name, MAXCONNECTIONS))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+       } else if (IS_PHASE_DECLARATIVE(param))
+               SET_PSTATE_REPLY_OPTIONAL(param);
+}
+
+static int iscsi_check_boolean_value(struct iscsi_param *param, char *value)
+{
+       if (strcmp(value, YES) && strcmp(value, NO)) {
+               pr_err("Illegal value for \"%s\", must be either"
+                       " \"%s\" or \"%s\".\n", param->name, YES, NO);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_ptr)
+{
+       char *tmpptr;
+       int value = 0;
+
+       value = simple_strtoul(value_ptr, &tmpptr, 0);
+
+/* #warning FIXME: Fix this */
+#if 0
+       if (strspn(endptr, WHITE_SPACE) != strlen(endptr)) {
+               pr_err("Illegal value \"%s\" for \"%s\".\n",
+                       value, param->name);
+               return -1;
+       }
+#endif
+       if (IS_TYPERANGE_0_TO_2(param)) {
+               if ((value < 0) || (value > 2)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 0 and 2.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+       if (IS_TYPERANGE_0_TO_3600(param)) {
+               if ((value < 0) || (value > 3600)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 0 and 3600.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+       if (IS_TYPERANGE_0_TO_32767(param)) {
+               if ((value < 0) || (value > 32767)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 0 and 32767.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+       if (IS_TYPERANGE_0_TO_65535(param)) {
+               if ((value < 0) || (value > 65535)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 0 and 65535.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+       if (IS_TYPERANGE_1_TO_65535(param)) {
+               if ((value < 1) || (value > 65535)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 1 and 65535.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+       if (IS_TYPERANGE_2_TO_3600(param)) {
+               if ((value < 2) || (value > 3600)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 2 and 3600.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+       if (IS_TYPERANGE_512_TO_16777215(param)) {
+               if ((value < 512) || (value > 16777215)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " between 512 and 16777215.\n", param->name);
+                       return -1;
+               }
+               return 0;
+       }
+
+       return 0;
+}
+
+static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value)
+{
+       char *left_val_ptr = NULL, *right_val_ptr = NULL;
+       char *tilde_ptr = NULL, *tmp_ptr = NULL;
+       u32 left_val, right_val, local_left_val, local_right_val;
+
+       if (strcmp(param->name, IFMARKINT) &&
+           strcmp(param->name, OFMARKINT)) {
+               pr_err("Only parameters \"%s\" or \"%s\" may contain a"
+                      " numerical range value.\n", IFMARKINT, OFMARKINT);
+               return -1;
+       }
+
+       if (IS_PSTATE_PROPOSER(param))
+               return 0;
+
+       tilde_ptr = strchr(value, '~');
+       if (!tilde_ptr) {
+               pr_err("Unable to locate numerical range indicator"
+                       " \"~\" for \"%s\".\n", param->name);
+               return -1;
+       }
+       *tilde_ptr = '\0';
+
+       left_val_ptr = value;
+       right_val_ptr = value + strlen(left_val_ptr) + 1;
+
+       if (iscsi_check_numerical_value(param, left_val_ptr) < 0)
+               return -1;
+       if (iscsi_check_numerical_value(param, right_val_ptr) < 0)
+               return -1;
+
+       left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+       right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+       *tilde_ptr = '~';
+
+       if (right_val < left_val) {
+               pr_err("Numerical range for parameter \"%s\" contains"
+                       " a right value which is less than the left.\n",
+                               param->name);
+               return -1;
+       }
+
+       /*
+        * For now,  enforce reasonable defaults for [I,O]FMarkInt.
+        */
+       tilde_ptr = strchr(param->value, '~');
+       if (!tilde_ptr) {
+               pr_err("Unable to locate numerical range indicator"
+                       " \"~\" for \"%s\".\n", param->name);
+               return -1;
+       }
+       *tilde_ptr = '\0';
+
+       left_val_ptr = param->value;
+       right_val_ptr = param->value + strlen(left_val_ptr) + 1;
+
+       local_left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+       local_right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+       *tilde_ptr = '~';
+
+       if (param->set_param) {
+               if ((left_val < local_left_val) ||
+                   (right_val < local_left_val)) {
+                       pr_err("Passed value range \"%u~%u\" is below"
+                               " minimum left value \"%u\" for key \"%s\","
+                               " rejecting.\n", left_val, right_val,
+                               local_left_val, param->name);
+                       return -1;
+               }
+       } else {
+               if ((left_val < local_left_val) &&
+                   (right_val < local_left_val)) {
+                       pr_err("Received value range \"%u~%u\" is"
+                               " below minimum left value \"%u\" for key"
+                               " \"%s\", rejecting.\n", left_val, right_val,
+                               local_left_val, param->name);
+                       SET_PSTATE_REJECT(param);
+                       if (iscsi_update_param_value(param, REJECT) < 0)
+                               return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value)
+{
+       if (IS_PSTATE_PROPOSER(param))
+               return 0;
+
+       if (IS_TYPERANGE_AUTH_PARAM(param)) {
+               if (strcmp(value, KRB5) && strcmp(value, SPKM1) &&
+                   strcmp(value, SPKM2) && strcmp(value, SRP) &&
+                   strcmp(value, CHAP) && strcmp(value, NONE)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " \"%s\", \"%s\", \"%s\", \"%s\", \"%s\""
+                               " or \"%s\".\n", param->name, KRB5,
+                                       SPKM1, SPKM2, SRP, CHAP, NONE);
+                       return -1;
+               }
+       }
+       if (IS_TYPERANGE_DIGEST_PARAM(param)) {
+               if (strcmp(value, CRC32C) && strcmp(value, NONE)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " \"%s\" or \"%s\".\n", param->name,
+                                       CRC32C, NONE);
+                       return -1;
+               }
+       }
+       if (IS_TYPERANGE_SESSIONTYPE(param)) {
+               if (strcmp(value, DISCOVERY) && strcmp(value, NORMAL)) {
+                       pr_err("Illegal value for \"%s\", must be"
+                               " \"%s\" or \"%s\".\n", param->name,
+                                       DISCOVERY, NORMAL);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ *     This function is used to pick a value range number,  currently just
+ *     returns the lesser of both right values.
+ */
+static char *iscsi_get_value_from_number_range(
+       struct iscsi_param *param,
+       char *value)
+{
+       char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL;
+       u32 acceptor_right_value, proposer_right_value;
+
+       tilde_ptr1 = strchr(value, '~');
+       if (!tilde_ptr1)
+               return NULL;
+       *tilde_ptr1++ = '\0';
+       proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0);
+
+       tilde_ptr2 = strchr(param->value, '~');
+       if (!tilde_ptr2)
+               return NULL;
+       *tilde_ptr2++ = '\0';
+       acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0);
+
+       return (acceptor_right_value >= proposer_right_value) ?
+               tilde_ptr1 : tilde_ptr2;
+}
+
+static char *iscsi_check_valuelist_for_support(
+       struct iscsi_param *param,
+       char *value)
+{
+       char *tmp1 = NULL, *tmp2 = NULL;
+       char *acceptor_values = NULL, *proposer_values = NULL;
+
+       acceptor_values = param->value;
+       proposer_values = value;
+
+       do {
+               if (!proposer_values)
+                       return NULL;
+               tmp1 = strchr(proposer_values, ',');
+               if (tmp1)
+                       *tmp1 = '\0';
+               acceptor_values = param->value;
+               do {
+                       if (!acceptor_values) {
+                               if (tmp1)
+                                       *tmp1 = ',';
+                               return NULL;
+                       }
+                       tmp2 = strchr(acceptor_values, ',');
+                       if (tmp2)
+                               *tmp2 = '\0';
+                       if (!acceptor_values || !proposer_values) {
+                               if (tmp1)
+                                       *tmp1 = ',';
+                               if (tmp2)
+                                       *tmp2 = ',';
+                               return NULL;
+                       }
+                       if (!strcmp(acceptor_values, proposer_values)) {
+                               if (tmp2)
+                                       *tmp2 = ',';
+                               goto out;
+                       }
+                       if (tmp2)
+                               *tmp2++ = ',';
+
+                       acceptor_values = tmp2;
+                       if (!acceptor_values)
+                               break;
+               } while (acceptor_values);
+               if (tmp1)
+                       *tmp1++ = ',';
+               proposer_values = tmp1;
+       } while (proposer_values);
+
+out:
+       return proposer_values;
+}
+
+static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value)
+{
+       u8 acceptor_boolean_value = 0, proposer_boolean_value = 0;
+       char *negoitated_value = NULL;
+
+       if (IS_PSTATE_ACCEPTOR(param)) {
+               pr_err("Received key \"%s\" twice, protocol error.\n",
+                               param->name);
+               return -1;
+       }
+
+       if (IS_PSTATE_REJECT(param))
+               return 0;
+
+       if (IS_TYPE_BOOL_AND(param)) {
+               if (!strcmp(value, YES))
+                       proposer_boolean_value = 1;
+               if (!strcmp(param->value, YES))
+                       acceptor_boolean_value = 1;
+               if (acceptor_boolean_value && proposer_boolean_value)
+                       do {} while (0);
+               else {
+                       if (iscsi_update_param_value(param, NO) < 0)
+                               return -1;
+                       if (!proposer_boolean_value)
+                               SET_PSTATE_REPLY_OPTIONAL(param);
+               }
+       } else if (IS_TYPE_BOOL_OR(param)) {
+               if (!strcmp(value, YES))
+                       proposer_boolean_value = 1;
+               if (!strcmp(param->value, YES))
+                       acceptor_boolean_value = 1;
+               if (acceptor_boolean_value || proposer_boolean_value) {
+                       if (iscsi_update_param_value(param, YES) < 0)
+                               return -1;
+                       if (proposer_boolean_value)
+                               SET_PSTATE_REPLY_OPTIONAL(param);
+               }
+       } else if (IS_TYPE_NUMBER(param)) {
+               char *tmpptr, buf[10];
+               u32 acceptor_value = simple_strtoul(param->value, &tmpptr, 0);
+               u32 proposer_value = simple_strtoul(value, &tmpptr, 0);
+
+               memset(buf, 0, 10);
+
+               if (!strcmp(param->name, MAXCONNECTIONS) ||
+                   !strcmp(param->name, MAXBURSTLENGTH) ||
+                   !strcmp(param->name, FIRSTBURSTLENGTH) ||
+                   !strcmp(param->name, MAXOUTSTANDINGR2T) ||
+                   !strcmp(param->name, DEFAULTTIME2RETAIN) ||
+                   !strcmp(param->name, ERRORRECOVERYLEVEL)) {
+                       if (proposer_value > acceptor_value) {
+                               sprintf(buf, "%u", acceptor_value);
+                               if (iscsi_update_param_value(param,
+                                               &buf[0]) < 0)
+                                       return -1;
+                       } else {
+                               if (iscsi_update_param_value(param, value) < 0)
+                                       return -1;
+                       }
+               } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+                       if (acceptor_value > proposer_value) {
+                               sprintf(buf, "%u", acceptor_value);
+                               if (iscsi_update_param_value(param,
+                                               &buf[0]) < 0)
+                                       return -1;
+                       } else {
+                               if (iscsi_update_param_value(param, value) < 0)
+                                       return -1;
+                       }
+               } else {
+                       if (iscsi_update_param_value(param, value) < 0)
+                               return -1;
+               }
+
+               if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
+                       SET_PSTATE_REPLY_OPTIONAL(param);
+       } else if (IS_TYPE_NUMBER_RANGE(param)) {
+               negoitated_value = iscsi_get_value_from_number_range(
+                                       param, value);
+               if (!negoitated_value)
+                       return -1;
+               if (iscsi_update_param_value(param, negoitated_value) < 0)
+                       return -1;
+       } else if (IS_TYPE_VALUE_LIST(param)) {
+               negoitated_value = iscsi_check_valuelist_for_support(
+                                       param, value);
+               if (!negoitated_value) {
+                       pr_err("Proposer's value list \"%s\" contains"
+                               " no valid values from Acceptor's value list"
+                               " \"%s\".\n", value, param->value);
+                       return -1;
+               }
+               if (iscsi_update_param_value(param, negoitated_value) < 0)
+                       return -1;
+       } else if (IS_PHASE_DECLARATIVE(param)) {
+               if (iscsi_update_param_value(param, value) < 0)
+                       return -1;
+               SET_PSTATE_REPLY_OPTIONAL(param);
+       }
+
+       return 0;
+}
+
+static int iscsi_check_proposer_state(struct iscsi_param *param, char *value)
+{
+       if (IS_PSTATE_RESPONSE_GOT(param)) {
+               pr_err("Received key \"%s\" twice, protocol error.\n",
+                               param->name);
+               return -1;
+       }
+
+       if (IS_TYPE_NUMBER_RANGE(param)) {
+               u32 left_val = 0, right_val = 0, recieved_value = 0;
+               char *left_val_ptr = NULL, *right_val_ptr = NULL;
+               char *tilde_ptr = NULL, *tmp_ptr = NULL;
+
+               if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) {
+                       if (iscsi_update_param_value(param, value) < 0)
+                               return -1;
+                       return 0;
+               }
+
+               tilde_ptr = strchr(value, '~');
+               if (tilde_ptr) {
+                       pr_err("Illegal \"~\" in response for \"%s\".\n",
+                                       param->name);
+                       return -1;
+               }
+               tilde_ptr = strchr(param->value, '~');
+               if (!tilde_ptr) {
+                       pr_err("Unable to locate numerical range"
+                               " indicator \"~\" for \"%s\".\n", param->name);
+                       return -1;
+               }
+               *tilde_ptr = '\0';
+
+               left_val_ptr = param->value;
+               right_val_ptr = param->value + strlen(left_val_ptr) + 1;
+               left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+               right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+               recieved_value = simple_strtoul(value, &tmp_ptr, 0);
+
+               *tilde_ptr = '~';
+
+               if ((recieved_value < left_val) ||
+                   (recieved_value > right_val)) {
+                       pr_err("Illegal response \"%s=%u\", value must"
+                               " be between %u and %u.\n", param->name,
+                               recieved_value, left_val, right_val);
+                       return -1;
+               }
+       } else if (IS_TYPE_VALUE_LIST(param)) {
+               char *comma_ptr = NULL, *tmp_ptr = NULL;
+
+               comma_ptr = strchr(value, ',');
+               if (comma_ptr) {
+                       pr_err("Illegal \",\" in response for \"%s\".\n",
+                                       param->name);
+                       return -1;
+               }
+
+               tmp_ptr = iscsi_check_valuelist_for_support(param, value);
+               if (!tmp_ptr)
+                       return -1;
+       }
+
+       if (iscsi_update_param_value(param, value) < 0)
+               return -1;
+
+       return 0;
+}
+
+static int iscsi_check_value(struct iscsi_param *param, char *value)
+{
+       char *comma_ptr = NULL;
+
+       if (!strcmp(value, REJECT)) {
+               if (!strcmp(param->name, IFMARKINT) ||
+                   !strcmp(param->name, OFMARKINT)) {
+                       /*
+                        * Reject is not fatal for [I,O]FMarkInt,  and causes
+                        * [I,O]FMarker to be reset to No. (See iSCSI v20 A.3.2)
+                        */
+                       SET_PSTATE_REJECT(param);
+                       return 0;
+               }
+               pr_err("Received %s=%s\n", param->name, value);
+               return -1;
+       }
+       if (!strcmp(value, IRRELEVANT)) {
+               pr_debug("Received %s=%s\n", param->name, value);
+               SET_PSTATE_IRRELEVANT(param);
+               return 0;
+       }
+       if (!strcmp(value, NOTUNDERSTOOD)) {
+               if (!IS_PSTATE_PROPOSER(param)) {
+                       pr_err("Received illegal offer %s=%s\n",
+                               param->name, value);
+                       return -1;
+               }
+
+/* #warning FIXME: Add check for X-ExtensionKey here */
+               pr_err("Standard iSCSI key \"%s\" cannot be answered"
+                       " with \"%s\", protocol error.\n", param->name, value);
+               return -1;
+       }
+
+       do {
+               comma_ptr = NULL;
+               comma_ptr = strchr(value, ',');
+
+               if (comma_ptr && !IS_TYPE_VALUE_LIST(param)) {
+                       pr_err("Detected value seperator \",\", but"
+                               " key \"%s\" does not allow a value list,"
+                               " protocol error.\n", param->name);
+                       return -1;
+               }
+               if (comma_ptr)
+                       *comma_ptr = '\0';
+
+               if (strlen(value) > VALUE_MAXLEN) {
+                       pr_err("Value for key \"%s\" exceeds %d,"
+                               " protocol error.\n", param->name,
+                               VALUE_MAXLEN);
+                       return -1;
+               }
+
+               if (IS_TYPE_BOOL_AND(param) || IS_TYPE_BOOL_OR(param)) {
+                       if (iscsi_check_boolean_value(param, value) < 0)
+                               return -1;
+               } else if (IS_TYPE_NUMBER(param)) {
+                       if (iscsi_check_numerical_value(param, value) < 0)
+                               return -1;
+               } else if (IS_TYPE_NUMBER_RANGE(param)) {
+                       if (iscsi_check_numerical_range_value(param, value) < 0)
+                               return -1;
+               } else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) {
+                       if (iscsi_check_string_or_list_value(param, value) < 0)
+                               return -1;
+               } else {
+                       pr_err("Huh? 0x%02x\n", param->type);
+                       return -1;
+               }
+
+               if (comma_ptr)
+                       *comma_ptr++ = ',';
+
+               value = comma_ptr;
+       } while (value);
+
+       return 0;
+}
+
+static struct iscsi_param *__iscsi_check_key(
+       char *key,
+       int sender,
+       struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param;
+
+       if (strlen(key) > KEY_MAXLEN) {
+               pr_err("Length of key name \"%s\" exceeds %d.\n",
+                       key, KEY_MAXLEN);
+               return NULL;
+       }
+
+       param = iscsi_find_param_from_key(key, param_list);
+       if (!param)
+               return NULL;
+
+       if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) {
+               pr_err("Key \"%s\" may not be sent to %s,"
+                       " protocol error.\n", param->name,
+                       (sender & SENDER_RECEIVER) ? "target" : "initiator");
+               return NULL;
+       }
+
+       if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) {
+               pr_err("Key \"%s\" may not be sent to %s,"
+                       " protocol error.\n", param->name,
+                       (sender & SENDER_RECEIVER) ? "initiator" : "target");
+               return NULL;
+       }
+
+       return param;
+}
+
+static struct iscsi_param *iscsi_check_key(
+       char *key,
+       int phase,
+       int sender,
+       struct iscsi_param_list *param_list)
+{
+       struct iscsi_param *param;
+       /*
+        * Key name length must not exceed 63 bytes. (See iSCSI v20 5.1)
+        */
+       if (strlen(key) > KEY_MAXLEN) {
+               pr_err("Length of key name \"%s\" exceeds %d.\n",
+                       key, KEY_MAXLEN);
+               return NULL;
+       }
+
+       param = iscsi_find_param_from_key(key, param_list);
+       if (!param)
+               return NULL;
+
+       if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) {
+               pr_err("Key \"%s\" may not be sent to %s,"
+                       " protocol error.\n", param->name,
+                       (sender & SENDER_RECEIVER) ? "target" : "initiator");
+               return NULL;
+       }
+       if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) {
+               pr_err("Key \"%s\" may not be sent to %s,"
+                               " protocol error.\n", param->name,
+                       (sender & SENDER_RECEIVER) ? "initiator" : "target");
+               return NULL;
+       }
+
+       if (IS_PSTATE_ACCEPTOR(param)) {
+               pr_err("Key \"%s\" received twice, protocol error.\n",
+                               key);
+               return NULL;
+       }
+
+       if (!phase)
+               return param;
+
+       if (!(param->phase & phase)) {
+               pr_err("Key \"%s\" may not be negotiated during ",
+                               param->name);
+               switch (phase) {
+               case PHASE_SECURITY:
+                       pr_debug("Security phase.\n");
+                       break;
+               case PHASE_OPERATIONAL:
+                       pr_debug("Operational phase.\n");
+               default:
+                       pr_debug("Unknown phase.\n");
+               }
+               return NULL;
+       }
+
+       return param;
+}
+
+static int iscsi_enforce_integrity_rules(
+       u8 phase,
+       struct iscsi_param_list *param_list)
+{
+       char *tmpptr;
+       u8 DataSequenceInOrder = 0;
+       u8 ErrorRecoveryLevel = 0, SessionType = 0;
+       u8 IFMarker = 0, OFMarker = 0;
+       u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0;
+       u32 FirstBurstLength = 0, MaxBurstLength = 0;
+       struct iscsi_param *param = NULL;
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!(param->phase & phase))
+                       continue;
+               if (!strcmp(param->name, SESSIONTYPE))
+                       if (!strcmp(param->value, NORMAL))
+                               SessionType = 1;
+               if (!strcmp(param->name, ERRORRECOVERYLEVEL))
+                       ErrorRecoveryLevel = simple_strtoul(param->value,
+                                       &tmpptr, 0);
+               if (!strcmp(param->name, DATASEQUENCEINORDER))
+                       if (!strcmp(param->value, YES))
+                               DataSequenceInOrder = 1;
+               if (!strcmp(param->name, MAXBURSTLENGTH))
+                       MaxBurstLength = simple_strtoul(param->value,
+                                       &tmpptr, 0);
+               if (!strcmp(param->name, IFMARKER))
+                       if (!strcmp(param->value, YES))
+                               IFMarker = 1;
+               if (!strcmp(param->name, OFMARKER))
+                       if (!strcmp(param->value, YES))
+                               OFMarker = 1;
+               if (!strcmp(param->name, IFMARKINT))
+                       if (!strcmp(param->value, REJECT))
+                               IFMarkInt_Reject = 1;
+               if (!strcmp(param->name, OFMARKINT))
+                       if (!strcmp(param->value, REJECT))
+                               OFMarkInt_Reject = 1;
+       }
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!(param->phase & phase))
+                       continue;
+               if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) &&
+                    (strcmp(param->name, IFMARKER) &&
+                     strcmp(param->name, OFMARKER) &&
+                     strcmp(param->name, IFMARKINT) &&
+                     strcmp(param->name, OFMARKINT))))
+                       continue;
+               if (!strcmp(param->name, MAXOUTSTANDINGR2T) &&
+                   DataSequenceInOrder && (ErrorRecoveryLevel > 0)) {
+                       if (strcmp(param->value, "1")) {
+                               if (iscsi_update_param_value(param, "1") < 0)
+                                       return -1;
+                               pr_debug("Reset \"%s\" to \"%s\".\n",
+                                       param->name, param->value);
+                       }
+               }
+               if (!strcmp(param->name, MAXCONNECTIONS) && !SessionType) {
+                       if (strcmp(param->value, "1")) {
+                               if (iscsi_update_param_value(param, "1") < 0)
+                                       return -1;
+                               pr_debug("Reset \"%s\" to \"%s\".\n",
+                                       param->name, param->value);
+                       }
+               }
+               if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+                       FirstBurstLength = simple_strtoul(param->value,
+                                       &tmpptr, 0);
+                       if (FirstBurstLength > MaxBurstLength) {
+                               char tmpbuf[10];
+                               memset(tmpbuf, 0, 10);
+                               sprintf(tmpbuf, "%u", MaxBurstLength);
+                               if (iscsi_update_param_value(param, tmpbuf))
+                                       return -1;
+                               pr_debug("Reset \"%s\" to \"%s\".\n",
+                                       param->name, param->value);
+                       }
+               }
+               if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) {
+                       if (iscsi_update_param_value(param, NO) < 0)
+                               return -1;
+                       IFMarker = 0;
+                       pr_debug("Reset \"%s\" to \"%s\".\n",
+                                       param->name, param->value);
+               }
+               if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) {
+                       if (iscsi_update_param_value(param, NO) < 0)
+                               return -1;
+                       OFMarker = 0;
+                       pr_debug("Reset \"%s\" to \"%s\".\n",
+                                        param->name, param->value);
+               }
+               if (!strcmp(param->name, IFMARKINT) && !IFMarker) {
+                       if (!strcmp(param->value, REJECT))
+                               continue;
+                       param->state &= ~PSTATE_NEGOTIATE;
+                       if (iscsi_update_param_value(param, IRRELEVANT) < 0)
+                               return -1;
+                       pr_debug("Reset \"%s\" to \"%s\".\n",
+                                       param->name, param->value);
+               }
+               if (!strcmp(param->name, OFMARKINT) && !OFMarker) {
+                       if (!strcmp(param->value, REJECT))
+                               continue;
+                       param->state &= ~PSTATE_NEGOTIATE;
+                       if (iscsi_update_param_value(param, IRRELEVANT) < 0)
+                               return -1;
+                       pr_debug("Reset \"%s\" to \"%s\".\n",
+                                       param->name, param->value);
+               }
+       }
+
+       return 0;
+}
+
+int iscsi_decode_text_input(
+       u8 phase,
+       u8 sender,
+       char *textbuf,
+       u32 length,
+       struct iscsi_param_list *param_list)
+{
+       char *tmpbuf, *start = NULL, *end = NULL;
+
+       tmpbuf = kzalloc(length + 1, GFP_KERNEL);
+       if (!tmpbuf) {
+               pr_err("Unable to allocate memory for tmpbuf.\n");
+               return -1;
+       }
+
+       memcpy(tmpbuf, textbuf, length);
+       tmpbuf[length] = '\0';
+       start = tmpbuf;
+       end = (start + length);
+
+       while (start < end) {
+               char *key, *value;
+               struct iscsi_param *param;
+
+               if (iscsi_extract_key_value(start, &key, &value) < 0) {
+                       kfree(tmpbuf);
+                       return -1;
+               }
+
+               pr_debug("Got key: %s=%s\n", key, value);
+
+               if (phase & PHASE_SECURITY) {
+                       if (iscsi_check_for_auth_key(key) > 0) {
+                               char *tmpptr = key + strlen(key);
+                               *tmpptr = '=';
+                               kfree(tmpbuf);
+                               return 1;
+                       }
+               }
+
+               param = iscsi_check_key(key, phase, sender, param_list);
+               if (!param) {
+                       if (iscsi_add_notunderstood_response(key,
+                                       value, param_list) < 0) {
+                               kfree(tmpbuf);
+                               return -1;
+                       }
+                       start += strlen(key) + strlen(value) + 2;
+                       continue;
+               }
+               if (iscsi_check_value(param, value) < 0) {
+                       kfree(tmpbuf);
+                       return -1;
+               }
+
+               start += strlen(key) + strlen(value) + 2;
+
+               if (IS_PSTATE_PROPOSER(param)) {
+                       if (iscsi_check_proposer_state(param, value) < 0) {
+                               kfree(tmpbuf);
+                               return -1;
+                       }
+                       SET_PSTATE_RESPONSE_GOT(param);
+               } else {
+                       if (iscsi_check_acceptor_state(param, value) < 0) {
+                               kfree(tmpbuf);
+                               return -1;
+                       }
+                       SET_PSTATE_ACCEPTOR(param);
+               }
+       }
+
+       kfree(tmpbuf);
+       return 0;
+}
+
+int iscsi_encode_text_output(
+       u8 phase,
+       u8 sender,
+       char *textbuf,
+       u32 *length,
+       struct iscsi_param_list *param_list)
+{
+       char *output_buf = NULL;
+       struct iscsi_extra_response *er;
+       struct iscsi_param *param;
+
+       output_buf = textbuf + *length;
+
+       if (iscsi_enforce_integrity_rules(phase, param_list) < 0)
+               return -1;
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!(param->sender & sender))
+                       continue;
+               if (IS_PSTATE_ACCEPTOR(param) &&
+                   !IS_PSTATE_RESPONSE_SENT(param) &&
+                   !IS_PSTATE_REPLY_OPTIONAL(param) &&
+                   (param->phase & phase)) {
+                       *length += sprintf(output_buf, "%s=%s",
+                               param->name, param->value);
+                       *length += 1;
+                       output_buf = textbuf + *length;
+                       SET_PSTATE_RESPONSE_SENT(param);
+                       pr_debug("Sending key: %s=%s\n",
+                               param->name, param->value);
+                       continue;
+               }
+               if (IS_PSTATE_NEGOTIATE(param) &&
+                   !IS_PSTATE_ACCEPTOR(param) &&
+                   !IS_PSTATE_PROPOSER(param) &&
+                   (param->phase & phase)) {
+                       *length += sprintf(output_buf, "%s=%s",
+                               param->name, param->value);
+                       *length += 1;
+                       output_buf = textbuf + *length;
+                       SET_PSTATE_PROPOSER(param);
+                       iscsi_check_proposer_for_optional_reply(param);
+                       pr_debug("Sending key: %s=%s\n",
+                               param->name, param->value);
+               }
+       }
+
+       list_for_each_entry(er, &param_list->extra_response_list, er_list) {
+               *length += sprintf(output_buf, "%s=%s", er->key, er->value);
+               *length += 1;
+               output_buf = textbuf + *length;
+               pr_debug("Sending key: %s=%s\n", er->key, er->value);
+       }
+       iscsi_release_extra_responses(param_list);
+
+       return 0;
+}
+
+int iscsi_check_negotiated_keys(struct iscsi_param_list *param_list)
+{
+       int ret = 0;
+       struct iscsi_param *param;
+
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (IS_PSTATE_NEGOTIATE(param) &&
+                   IS_PSTATE_PROPOSER(param) &&
+                   !IS_PSTATE_RESPONSE_GOT(param) &&
+                   !IS_PSTATE_REPLY_OPTIONAL(param) &&
+                   !IS_PHASE_DECLARATIVE(param)) {
+                       pr_err("No response for proposed key \"%s\".\n",
+                                       param->name);
+                       ret = -1;
+               }
+       }
+
+       return ret;
+}
+
+int iscsi_change_param_value(
+       char *keyvalue,
+       struct iscsi_param_list *param_list,
+       int check_key)
+{
+       char *key = NULL, *value = NULL;
+       struct iscsi_param *param;
+       int sender = 0;
+
+       if (iscsi_extract_key_value(keyvalue, &key, &value) < 0)
+               return -1;
+
+       if (!check_key) {
+               param = __iscsi_check_key(keyvalue, sender, param_list);
+               if (!param)
+                       return -1;
+       } else {
+               param = iscsi_check_key(keyvalue, 0, sender, param_list);
+               if (!param)
+                       return -1;
+
+               param->set_param = 1;
+               if (iscsi_check_value(param, value) < 0) {
+                       param->set_param = 0;
+                       return -1;
+               }
+               param->set_param = 0;
+       }
+
+       if (iscsi_update_param_value(param, value) < 0)
+               return -1;
+
+       return 0;
+}
+
+void iscsi_set_connection_parameters(
+       struct iscsi_conn_ops *ops,
+       struct iscsi_param_list *param_list)
+{
+       char *tmpptr;
+       struct iscsi_param *param;
+
+       pr_debug("---------------------------------------------------"
+                       "---------------\n");
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param))
+                       continue;
+               if (!strcmp(param->name, AUTHMETHOD)) {
+                       pr_debug("AuthMethod:                   %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, HEADERDIGEST)) {
+                       ops->HeaderDigest = !strcmp(param->value, CRC32C);
+                       pr_debug("HeaderDigest:                 %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, DATADIGEST)) {
+                       ops->DataDigest = !strcmp(param->value, CRC32C);
+                       pr_debug("DataDigest:                   %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
+                       ops->MaxRecvDataSegmentLength =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("MaxRecvDataSegmentLength:     %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, OFMARKER)) {
+                       ops->OFMarker = !strcmp(param->value, YES);
+                       pr_debug("OFMarker:                     %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, IFMARKER)) {
+                       ops->IFMarker = !strcmp(param->value, YES);
+                       pr_debug("IFMarker:                     %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, OFMARKINT)) {
+                       ops->OFMarkInt =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("OFMarkInt:                    %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, IFMARKINT)) {
+                       ops->IFMarkInt =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("IFMarkInt:                    %s\n",
+                               param->value);
+               }
+       }
+       pr_debug("----------------------------------------------------"
+                       "--------------\n");
+}
+
+void iscsi_set_session_parameters(
+       struct iscsi_sess_ops *ops,
+       struct iscsi_param_list *param_list,
+       int leading)
+{
+       char *tmpptr;
+       struct iscsi_param *param;
+
+       pr_debug("----------------------------------------------------"
+                       "--------------\n");
+       list_for_each_entry(param, &param_list->param_list, p_list) {
+               if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param))
+                       continue;
+               if (!strcmp(param->name, INITIATORNAME)) {
+                       if (!param->value)
+                               continue;
+                       if (leading)
+                               snprintf(ops->InitiatorName,
+                                               sizeof(ops->InitiatorName),
+                                               "%s", param->value);
+                       pr_debug("InitiatorName:                %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, INITIATORALIAS)) {
+                       if (!param->value)
+                               continue;
+                       snprintf(ops->InitiatorAlias,
+                                               sizeof(ops->InitiatorAlias),
+                                               "%s", param->value);
+                       pr_debug("InitiatorAlias:               %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, TARGETNAME)) {
+                       if (!param->value)
+                               continue;
+                       if (leading)
+                               snprintf(ops->TargetName,
+                                               sizeof(ops->TargetName),
+                                               "%s", param->value);
+                       pr_debug("TargetName:                   %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, TARGETALIAS)) {
+                       if (!param->value)
+                               continue;
+                       snprintf(ops->TargetAlias, sizeof(ops->TargetAlias),
+                                       "%s", param->value);
+                       pr_debug("TargetAlias:                  %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) {
+                       ops->TargetPortalGroupTag =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("TargetPortalGroupTag:         %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, MAXCONNECTIONS)) {
+                       ops->MaxConnections =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("MaxConnections:               %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, INITIALR2T)) {
+                       ops->InitialR2T = !strcmp(param->value, YES);
+                        pr_debug("InitialR2T:                   %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, IMMEDIATEDATA)) {
+                       ops->ImmediateData = !strcmp(param->value, YES);
+                       pr_debug("ImmediateData:                %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, MAXBURSTLENGTH)) {
+                       ops->MaxBurstLength =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("MaxBurstLength:               %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+                       ops->FirstBurstLength =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("FirstBurstLength:             %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+                       ops->DefaultTime2Wait =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("DefaultTime2Wait:             %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) {
+                       ops->DefaultTime2Retain =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("DefaultTime2Retain:           %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) {
+                       ops->MaxOutstandingR2T =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("MaxOutstandingR2T:            %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, DATAPDUINORDER)) {
+                       ops->DataPDUInOrder = !strcmp(param->value, YES);
+                       pr_debug("DataPDUInOrder:               %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, DATASEQUENCEINORDER)) {
+                       ops->DataSequenceInOrder = !strcmp(param->value, YES);
+                       pr_debug("DataSequenceInOrder:          %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) {
+                       ops->ErrorRecoveryLevel =
+                               simple_strtoul(param->value, &tmpptr, 0);
+                       pr_debug("ErrorRecoveryLevel:           %s\n",
+                               param->value);
+               } else if (!strcmp(param->name, SESSIONTYPE)) {
+                       ops->SessionType = !strcmp(param->value, DISCOVERY);
+                       pr_debug("SessionType:                  %s\n",
+                               param->value);
+               }
+       }
+       pr_debug("----------------------------------------------------"
+                       "--------------\n");
+
+}
diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h
new file mode 100644 (file)
index 0000000..6a37fd6
--- /dev/null
@@ -0,0 +1,269 @@
+#ifndef ISCSI_PARAMETERS_H
+#define ISCSI_PARAMETERS_H
+
+struct iscsi_extra_response {
+       char key[64];
+       char value[32];
+       struct list_head er_list;
+} ____cacheline_aligned;
+
+struct iscsi_param {
+       char *name;
+       char *value;
+       u8 set_param;
+       u8 phase;
+       u8 scope;
+       u8 sender;
+       u8 type;
+       u8 use;
+       u16 type_range;
+       u32 state;
+       struct list_head p_list;
+} ____cacheline_aligned;
+
+extern int iscsi_login_rx_data(struct iscsi_conn *, char *, int);
+extern int iscsi_login_tx_data(struct iscsi_conn *, char *, char *, int);
+extern void iscsi_dump_conn_ops(struct iscsi_conn_ops *);
+extern void iscsi_dump_sess_ops(struct iscsi_sess_ops *);
+extern void iscsi_print_params(struct iscsi_param_list *);
+extern int iscsi_create_default_params(struct iscsi_param_list **);
+extern int iscsi_set_keys_to_negotiate(int, struct iscsi_param_list *);
+extern int iscsi_set_keys_irrelevant_for_discovery(struct iscsi_param_list *);
+extern int iscsi_copy_param_list(struct iscsi_param_list **,
+                       struct iscsi_param_list *, int);
+extern int iscsi_change_param_value(char *, struct iscsi_param_list *, int);
+extern void iscsi_release_param_list(struct iscsi_param_list *);
+extern struct iscsi_param *iscsi_find_param_from_key(char *, struct iscsi_param_list *);
+extern int iscsi_extract_key_value(char *, char **, char **);
+extern int iscsi_update_param_value(struct iscsi_param *, char *);
+extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_param_list *);
+extern int iscsi_encode_text_output(u8, u8, char *, u32 *,
+                       struct iscsi_param_list *);
+extern int iscsi_check_negotiated_keys(struct iscsi_param_list *);
+extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *,
+                       struct iscsi_param_list *);
+extern void iscsi_set_session_parameters(struct iscsi_sess_ops *,
+                       struct iscsi_param_list *, int);
+
+#define YES                            "Yes"
+#define NO                             "No"
+#define ALL                            "All"
+#define IRRELEVANT                     "Irrelevant"
+#define NONE                           "None"
+#define NOTUNDERSTOOD                  "NotUnderstood"
+#define REJECT                         "Reject"
+
+/*
+ * The Parameter Names.
+ */
+#define AUTHMETHOD                     "AuthMethod"
+#define HEADERDIGEST                   "HeaderDigest"
+#define DATADIGEST                     "DataDigest"
+#define MAXCONNECTIONS                 "MaxConnections"
+#define SENDTARGETS                    "SendTargets"
+#define TARGETNAME                     "TargetName"
+#define INITIATORNAME                  "InitiatorName"
+#define TARGETALIAS                    "TargetAlias"
+#define INITIATORALIAS                 "InitiatorAlias"
+#define TARGETADDRESS                  "TargetAddress"
+#define TARGETPORTALGROUPTAG           "TargetPortalGroupTag"
+#define INITIALR2T                     "InitialR2T"
+#define IMMEDIATEDATA                  "ImmediateData"
+#define MAXRECVDATASEGMENTLENGTH       "MaxRecvDataSegmentLength"
+#define MAXBURSTLENGTH                 "MaxBurstLength"
+#define FIRSTBURSTLENGTH               "FirstBurstLength"
+#define DEFAULTTIME2WAIT               "DefaultTime2Wait"
+#define DEFAULTTIME2RETAIN             "DefaultTime2Retain"
+#define MAXOUTSTANDINGR2T              "MaxOutstandingR2T"
+#define DATAPDUINORDER                 "DataPDUInOrder"
+#define DATASEQUENCEINORDER            "DataSequenceInOrder"
+#define ERRORRECOVERYLEVEL             "ErrorRecoveryLevel"
+#define SESSIONTYPE                    "SessionType"
+#define IFMARKER                       "IFMarker"
+#define OFMARKER                       "OFMarker"
+#define IFMARKINT                      "IFMarkInt"
+#define OFMARKINT                      "OFMarkInt"
+#define X_EXTENSIONKEY                 "X-com.sbei.version"
+#define X_EXTENSIONKEY_CISCO_NEW       "X-com.cisco.protocol"
+#define X_EXTENSIONKEY_CISCO_OLD       "X-com.cisco.iscsi.draft"
+
+/*
+ * For AuthMethod.
+ */
+#define KRB5                           "KRB5"
+#define SPKM1                          "SPKM1"
+#define SPKM2                          "SPKM2"
+#define SRP                            "SRP"
+#define CHAP                           "CHAP"
+
+/*
+ * Initial values for Parameter Negotiation.
+ */
+#define INITIAL_AUTHMETHOD                     CHAP
+#define INITIAL_HEADERDIGEST                   "CRC32C,None"
+#define INITIAL_DATADIGEST                     "CRC32C,None"
+#define INITIAL_MAXCONNECTIONS                 "1"
+#define INITIAL_SENDTARGETS                    ALL
+#define INITIAL_TARGETNAME                     "LIO.Target"
+#define INITIAL_INITIATORNAME                  "LIO.Initiator"
+#define INITIAL_TARGETALIAS                    "LIO Target"
+#define INITIAL_INITIATORALIAS                 "LIO Initiator"
+#define INITIAL_TARGETADDRESS                  "0.0.0.0:0000,0"
+#define INITIAL_TARGETPORTALGROUPTAG           "1"
+#define INITIAL_INITIALR2T                     YES
+#define INITIAL_IMMEDIATEDATA                  YES
+#define INITIAL_MAXRECVDATASEGMENTLENGTH       "8192"
+#define INITIAL_MAXBURSTLENGTH                 "262144"
+#define INITIAL_FIRSTBURSTLENGTH               "65536"
+#define INITIAL_DEFAULTTIME2WAIT               "2"
+#define INITIAL_DEFAULTTIME2RETAIN             "20"
+#define INITIAL_MAXOUTSTANDINGR2T              "1"
+#define INITIAL_DATAPDUINORDER                 YES
+#define INITIAL_DATASEQUENCEINORDER            YES
+#define INITIAL_ERRORRECOVERYLEVEL             "0"
+#define INITIAL_SESSIONTYPE                    NORMAL
+#define INITIAL_IFMARKER                       NO
+#define INITIAL_OFMARKER                       NO
+#define INITIAL_IFMARKINT                      "2048~65535"
+#define INITIAL_OFMARKINT                      "2048~65535"
+
+/*
+ * For [Header,Data]Digests.
+ */
+#define CRC32C                         "CRC32C"
+
+/*
+ * For SessionType.
+ */
+#define DISCOVERY                      "Discovery"
+#define NORMAL                         "Normal"
+
+/*
+ * struct iscsi_param->use
+ */
+#define USE_LEADING_ONLY               0x01
+#define USE_INITIAL_ONLY               0x02
+#define USE_ALL                                0x04
+
+#define IS_USE_LEADING_ONLY(p)         ((p)->use & USE_LEADING_ONLY)
+#define IS_USE_INITIAL_ONLY(p)         ((p)->use & USE_INITIAL_ONLY)
+#define IS_USE_ALL(p)                  ((p)->use & USE_ALL)
+
+#define SET_USE_INITIAL_ONLY(p)                ((p)->use |= USE_INITIAL_ONLY)
+
+/*
+ * struct iscsi_param->sender
+ */
+#define        SENDER_INITIATOR                0x01
+#define SENDER_TARGET                  0x02
+#define SENDER_BOTH                    0x03
+/* Used in iscsi_check_key() */
+#define SENDER_RECEIVER                        0x04
+
+#define IS_SENDER_INITIATOR(p)         ((p)->sender & SENDER_INITIATOR)
+#define IS_SENDER_TARGET(p)            ((p)->sender & SENDER_TARGET)
+#define IS_SENDER_BOTH(p)              ((p)->sender & SENDER_BOTH)
+
+/*
+ * struct iscsi_param->scope
+ */
+#define SCOPE_CONNECTION_ONLY          0x01
+#define SCOPE_SESSION_WIDE             0x02
+
+#define IS_SCOPE_CONNECTION_ONLY(p)    ((p)->scope & SCOPE_CONNECTION_ONLY)
+#define IS_SCOPE_SESSION_WIDE(p)       ((p)->scope & SCOPE_SESSION_WIDE)
+
+/*
+ * struct iscsi_param->phase
+ */
+#define PHASE_SECURITY                 0x01
+#define PHASE_OPERATIONAL              0x02
+#define PHASE_DECLARATIVE              0x04
+#define PHASE_FFP0                     0x08
+
+#define IS_PHASE_SECURITY(p)           ((p)->phase & PHASE_SECURITY)
+#define IS_PHASE_OPERATIONAL(p)                ((p)->phase & PHASE_OPERATIONAL)
+#define IS_PHASE_DECLARATIVE(p)                ((p)->phase & PHASE_DECLARATIVE)
+#define IS_PHASE_FFP0(p)               ((p)->phase & PHASE_FFP0)
+
+/*
+ * struct iscsi_param->type
+ */
+#define TYPE_BOOL_AND                  0x01
+#define TYPE_BOOL_OR                   0x02
+#define TYPE_NUMBER                    0x04
+#define TYPE_NUMBER_RANGE              0x08
+#define TYPE_STRING                    0x10
+#define TYPE_VALUE_LIST                        0x20
+
+#define IS_TYPE_BOOL_AND(p)            ((p)->type & TYPE_BOOL_AND)
+#define IS_TYPE_BOOL_OR(p)             ((p)->type & TYPE_BOOL_OR)
+#define IS_TYPE_NUMBER(p)              ((p)->type & TYPE_NUMBER)
+#define IS_TYPE_NUMBER_RANGE(p)                ((p)->type & TYPE_NUMBER_RANGE)
+#define IS_TYPE_STRING(p)              ((p)->type & TYPE_STRING)
+#define IS_TYPE_VALUE_LIST(p)          ((p)->type & TYPE_VALUE_LIST)
+
+/*
+ * struct iscsi_param->type_range
+ */
+#define TYPERANGE_BOOL_AND             0x0001
+#define TYPERANGE_BOOL_OR              0x0002
+#define TYPERANGE_0_TO_2               0x0004
+#define TYPERANGE_0_TO_3600            0x0008
+#define TYPERANGE_0_TO_32767           0x0010
+#define TYPERANGE_0_TO_65535           0x0020
+#define TYPERANGE_1_TO_65535           0x0040
+#define TYPERANGE_2_TO_3600            0x0080
+#define TYPERANGE_512_TO_16777215      0x0100
+#define TYPERANGE_AUTH                 0x0200
+#define TYPERANGE_DIGEST               0x0400
+#define TYPERANGE_ISCSINAME            0x0800
+#define TYPERANGE_MARKINT              0x1000
+#define TYPERANGE_SESSIONTYPE          0x2000
+#define TYPERANGE_TARGETADDRESS                0x4000
+#define TYPERANGE_UTF8                 0x8000
+
+#define IS_TYPERANGE_0_TO_2(p)         ((p)->type_range & TYPERANGE_0_TO_2)
+#define IS_TYPERANGE_0_TO_3600(p)      ((p)->type_range & TYPERANGE_0_TO_3600)
+#define IS_TYPERANGE_0_TO_32767(p)     ((p)->type_range & TYPERANGE_0_TO_32767)
+#define IS_TYPERANGE_0_TO_65535(p)     ((p)->type_range & TYPERANGE_0_TO_65535)
+#define IS_TYPERANGE_1_TO_65535(p)     ((p)->type_range & TYPERANGE_1_TO_65535)
+#define IS_TYPERANGE_2_TO_3600(p)      ((p)->type_range & TYPERANGE_2_TO_3600)
+#define IS_TYPERANGE_512_TO_16777215(p)        ((p)->type_range & \
+                                               TYPERANGE_512_TO_16777215)
+#define IS_TYPERANGE_AUTH_PARAM(p)     ((p)->type_range & TYPERANGE_AUTH)
+#define IS_TYPERANGE_DIGEST_PARAM(p)   ((p)->type_range & TYPERANGE_DIGEST)
+#define IS_TYPERANGE_SESSIONTYPE(p)    ((p)->type_range & \
+                                               TYPERANGE_SESSIONTYPE)
+
+/*
+ * struct iscsi_param->state
+ */
+#define PSTATE_ACCEPTOR                        0x01
+#define PSTATE_NEGOTIATE               0x02
+#define PSTATE_PROPOSER                        0x04
+#define PSTATE_IRRELEVANT              0x08
+#define PSTATE_REJECT                  0x10
+#define PSTATE_REPLY_OPTIONAL          0x20
+#define PSTATE_RESPONSE_GOT            0x40
+#define PSTATE_RESPONSE_SENT           0x80
+
+#define IS_PSTATE_ACCEPTOR(p)          ((p)->state & PSTATE_ACCEPTOR)
+#define IS_PSTATE_NEGOTIATE(p)         ((p)->state & PSTATE_NEGOTIATE)
+#define IS_PSTATE_PROPOSER(p)          ((p)->state & PSTATE_PROPOSER)
+#define IS_PSTATE_IRRELEVANT(p)                ((p)->state & PSTATE_IRRELEVANT)
+#define IS_PSTATE_REJECT(p)            ((p)->state & PSTATE_REJECT)
+#define IS_PSTATE_REPLY_OPTIONAL(p)    ((p)->state & PSTATE_REPLY_OPTIONAL)
+#define IS_PSTATE_RESPONSE_GOT(p)      ((p)->state & PSTATE_RESPONSE_GOT)
+#define IS_PSTATE_RESPONSE_SENT(p)     ((p)->state & PSTATE_RESPONSE_SENT)
+
+#define SET_PSTATE_ACCEPTOR(p)         ((p)->state |= PSTATE_ACCEPTOR)
+#define SET_PSTATE_NEGOTIATE(p)                ((p)->state |= PSTATE_NEGOTIATE)
+#define SET_PSTATE_PROPOSER(p)         ((p)->state |= PSTATE_PROPOSER)
+#define SET_PSTATE_IRRELEVANT(p)       ((p)->state |= PSTATE_IRRELEVANT)
+#define SET_PSTATE_REJECT(p)           ((p)->state |= PSTATE_REJECT)
+#define SET_PSTATE_REPLY_OPTIONAL(p)   ((p)->state |= PSTATE_REPLY_OPTIONAL)
+#define SET_PSTATE_RESPONSE_GOT(p)     ((p)->state |= PSTATE_RESPONSE_GOT)
+#define SET_PSTATE_RESPONSE_SENT(p)    ((p)->state |= PSTATE_RESPONSE_SENT)
+
+#endif /* ISCSI_PARAMETERS_H */
diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c
new file mode 100644 (file)
index 0000000..fc69408
--- /dev/null
@@ -0,0 +1,664 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI DataSequenceInOrder=No
+ * and DataPDUInOrder=No.
+ *
+ \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/slab.h>
+#include <linux/random.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_seq_pdu_list.h"
+
+#define OFFLOAD_BUF_SIZE       32768
+
+void iscsit_dump_seq_list(struct iscsi_cmd *cmd)
+{
+       int i;
+       struct iscsi_seq *seq;
+
+       pr_debug("Dumping Sequence List for ITT: 0x%08x:\n",
+                       cmd->init_task_tag);
+
+       for (i = 0; i < cmd->seq_count; i++) {
+               seq = &cmd->seq_list[i];
+               pr_debug("i: %d, pdu_start: %d, pdu_count: %d,"
+                       " offset: %d, xfer_len: %d, seq_send_order: %d,"
+                       " seq_no: %d\n", i, seq->pdu_start, seq->pdu_count,
+                       seq->offset, seq->xfer_len, seq->seq_send_order,
+                       seq->seq_no);
+       }
+}
+
+void iscsit_dump_pdu_list(struct iscsi_cmd *cmd)
+{
+       int i;
+       struct iscsi_pdu *pdu;
+
+       pr_debug("Dumping PDU List for ITT: 0x%08x:\n",
+                       cmd->init_task_tag);
+
+       for (i = 0; i < cmd->pdu_count; i++) {
+               pdu = &cmd->pdu_list[i];
+               pr_debug("i: %d, offset: %d, length: %d,"
+                       " pdu_send_order: %d, seq_no: %d\n", i, pdu->offset,
+                       pdu->length, pdu->pdu_send_order, pdu->seq_no);
+       }
+}
+
+static void iscsit_ordered_seq_lists(
+       struct iscsi_cmd *cmd,
+       u8 type)
+{
+       u32 i, seq_count = 0;
+
+       for (i = 0; i < cmd->seq_count; i++) {
+               if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+                       continue;
+               cmd->seq_list[i].seq_send_order = seq_count++;
+       }
+}
+
+static void iscsit_ordered_pdu_lists(
+       struct iscsi_cmd *cmd,
+       u8 type)
+{
+       u32 i, pdu_send_order = 0, seq_no = 0;
+
+       for (i = 0; i < cmd->pdu_count; i++) {
+redo:
+               if (cmd->pdu_list[i].seq_no == seq_no) {
+                       cmd->pdu_list[i].pdu_send_order = pdu_send_order++;
+                       continue;
+               }
+               seq_no++;
+               pdu_send_order = 0;
+               goto redo;
+       }
+}
+
+/*
+ *     Generate count random values into array.
+ *     Use 0x80000000 to mark generates valued in array[].
+ */
+static void iscsit_create_random_array(u32 *array, u32 count)
+{
+       int i, j, k;
+
+       if (count == 1) {
+               array[0] = 0;
+               return;
+       }
+
+       for (i = 0; i < count; i++) {
+redo:
+               get_random_bytes(&j, sizeof(u32));
+               j = (1 + (int) (9999 + 1) - j) % count;
+               for (k = 0; k < i + 1; k++) {
+                       j |= 0x80000000;
+                       if ((array[k] & 0x80000000) && (array[k] == j))
+                               goto redo;
+               }
+               array[i] = j;
+       }
+
+       for (i = 0; i < count; i++)
+               array[i] &= ~0x80000000;
+}
+
+static int iscsit_randomize_pdu_lists(
+       struct iscsi_cmd *cmd,
+       u8 type)
+{
+       int i = 0;
+       u32 *array, pdu_count, seq_count = 0, seq_no = 0, seq_offset = 0;
+
+       for (pdu_count = 0; pdu_count < cmd->pdu_count; pdu_count++) {
+redo:
+               if (cmd->pdu_list[pdu_count].seq_no == seq_no) {
+                       seq_count++;
+                       continue;
+               }
+               array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+               if (!array) {
+                       pr_err("Unable to allocate memory"
+                               " for random array.\n");
+                       return -1;
+               }
+               iscsit_create_random_array(array, seq_count);
+
+               for (i = 0; i < seq_count; i++)
+                       cmd->pdu_list[seq_offset+i].pdu_send_order = array[i];
+
+               kfree(array);
+
+               seq_offset += seq_count;
+               seq_count = 0;
+               seq_no++;
+               goto redo;
+       }
+
+       if (seq_count) {
+               array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+               if (!array) {
+                       pr_err("Unable to allocate memory for"
+                               " random array.\n");
+                       return -1;
+               }
+               iscsit_create_random_array(array, seq_count);
+
+               for (i = 0; i < seq_count; i++)
+                       cmd->pdu_list[seq_offset+i].pdu_send_order = array[i];
+
+               kfree(array);
+       }
+
+       return 0;
+}
+
+static int iscsit_randomize_seq_lists(
+       struct iscsi_cmd *cmd,
+       u8 type)
+{
+       int i, j = 0;
+       u32 *array, seq_count = cmd->seq_count;
+
+       if ((type == PDULIST_IMMEDIATE) || (type == PDULIST_UNSOLICITED))
+               seq_count--;
+       else if (type == PDULIST_IMMEDIATE_AND_UNSOLICITED)
+               seq_count -= 2;
+
+       if (!seq_count)
+               return 0;
+
+       array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+       if (!array) {
+               pr_err("Unable to allocate memory for random array.\n");
+               return -1;
+       }
+       iscsit_create_random_array(array, seq_count);
+
+       for (i = 0; i < cmd->seq_count; i++) {
+               if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+                       continue;
+               cmd->seq_list[i].seq_send_order = array[j++];
+       }
+
+       kfree(array);
+       return 0;
+}
+
+static void iscsit_determine_counts_for_list(
+       struct iscsi_cmd *cmd,
+       struct iscsi_build_list *bl,
+       u32 *seq_count,
+       u32 *pdu_count)
+{
+       int check_immediate = 0;
+       u32 burstlength = 0, offset = 0;
+       u32 unsolicited_data_length = 0;
+       struct iscsi_conn *conn = cmd->conn;
+
+       if ((bl->type == PDULIST_IMMEDIATE) ||
+           (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+               check_immediate = 1;
+
+       if ((bl->type == PDULIST_UNSOLICITED) ||
+           (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+               unsolicited_data_length = (cmd->data_length >
+                       conn->sess->sess_ops->FirstBurstLength) ?
+                       conn->sess->sess_ops->FirstBurstLength : cmd->data_length;
+
+       while (offset < cmd->data_length) {
+               *pdu_count += 1;
+
+               if (check_immediate) {
+                       check_immediate = 0;
+                       offset += bl->immediate_data_length;
+                       *seq_count += 1;
+                       if (unsolicited_data_length)
+                               unsolicited_data_length -=
+                                       bl->immediate_data_length;
+                       continue;
+               }
+               if (unsolicited_data_length > 0) {
+                       if ((offset + conn->conn_ops->MaxRecvDataSegmentLength)
+                                       >= cmd->data_length) {
+                               unsolicited_data_length -=
+                                       (cmd->data_length - offset);
+                               offset += (cmd->data_length - offset);
+                               continue;
+                       }
+                       if ((offset + conn->conn_ops->MaxRecvDataSegmentLength)
+                                       >= conn->sess->sess_ops->FirstBurstLength) {
+                               unsolicited_data_length -=
+                                       (conn->sess->sess_ops->FirstBurstLength -
+                                       offset);
+                               offset += (conn->sess->sess_ops->FirstBurstLength -
+                                       offset);
+                               burstlength = 0;
+                               *seq_count += 1;
+                               continue;
+                       }
+
+                       offset += conn->conn_ops->MaxRecvDataSegmentLength;
+                       unsolicited_data_length -=
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+                       continue;
+               }
+               if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+                    cmd->data_length) {
+                       offset += (cmd->data_length - offset);
+                       continue;
+               }
+               if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >=
+                    conn->sess->sess_ops->MaxBurstLength) {
+                       offset += (conn->sess->sess_ops->MaxBurstLength -
+                                       burstlength);
+                       burstlength = 0;
+                       *seq_count += 1;
+                       continue;
+               }
+
+               burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+               offset += conn->conn_ops->MaxRecvDataSegmentLength;
+       }
+}
+
+
+/*
+ *     Builds PDU and/or Sequence list,  called while DataSequenceInOrder=No
+ *     and DataPDUInOrder=No.
+ */
+static int iscsit_build_pdu_and_seq_list(
+       struct iscsi_cmd *cmd,
+       struct iscsi_build_list *bl)
+{
+       int check_immediate = 0, datapduinorder, datasequenceinorder;
+       u32 burstlength = 0, offset = 0, i = 0;
+       u32 pdu_count = 0, seq_no = 0, unsolicited_data_length = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_pdu *pdu = cmd->pdu_list;
+       struct iscsi_seq *seq = cmd->seq_list;
+
+       datapduinorder = conn->sess->sess_ops->DataPDUInOrder;
+       datasequenceinorder = conn->sess->sess_ops->DataSequenceInOrder;
+
+       if ((bl->type == PDULIST_IMMEDIATE) ||
+           (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+               check_immediate = 1;
+
+       if ((bl->type == PDULIST_UNSOLICITED) ||
+           (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+               unsolicited_data_length = (cmd->data_length >
+                       conn->sess->sess_ops->FirstBurstLength) ?
+                       conn->sess->sess_ops->FirstBurstLength : cmd->data_length;
+
+       while (offset < cmd->data_length) {
+               pdu_count++;
+               if (!datapduinorder) {
+                       pdu[i].offset = offset;
+                       pdu[i].seq_no = seq_no;
+               }
+               if (!datasequenceinorder && (pdu_count == 1)) {
+                       seq[seq_no].pdu_start = i;
+                       seq[seq_no].seq_no = seq_no;
+                       seq[seq_no].offset = offset;
+                       seq[seq_no].orig_offset = offset;
+               }
+
+               if (check_immediate) {
+                       check_immediate = 0;
+                       if (!datapduinorder) {
+                               pdu[i].type = PDUTYPE_IMMEDIATE;
+                               pdu[i++].length = bl->immediate_data_length;
+                       }
+                       if (!datasequenceinorder) {
+                               seq[seq_no].type = SEQTYPE_IMMEDIATE;
+                               seq[seq_no].pdu_count = 1;
+                               seq[seq_no].xfer_len =
+                                       bl->immediate_data_length;
+                       }
+                       offset += bl->immediate_data_length;
+                       pdu_count = 0;
+                       seq_no++;
+                       if (unsolicited_data_length)
+                               unsolicited_data_length -=
+                                       bl->immediate_data_length;
+                       continue;
+               }
+               if (unsolicited_data_length > 0) {
+                       if ((offset +
+                            conn->conn_ops->MaxRecvDataSegmentLength) >=
+                            cmd->data_length) {
+                               if (!datapduinorder) {
+                                       pdu[i].type = PDUTYPE_UNSOLICITED;
+                                       pdu[i].length =
+                                               (cmd->data_length - offset);
+                               }
+                               if (!datasequenceinorder) {
+                                       seq[seq_no].type = SEQTYPE_UNSOLICITED;
+                                       seq[seq_no].pdu_count = pdu_count;
+                                       seq[seq_no].xfer_len = (burstlength +
+                                               (cmd->data_length - offset));
+                               }
+                               unsolicited_data_length -=
+                                               (cmd->data_length - offset);
+                               offset += (cmd->data_length - offset);
+                               continue;
+                       }
+                       if ((offset +
+                            conn->conn_ops->MaxRecvDataSegmentLength) >=
+                                       conn->sess->sess_ops->FirstBurstLength) {
+                               if (!datapduinorder) {
+                                       pdu[i].type = PDUTYPE_UNSOLICITED;
+                                       pdu[i++].length =
+                                          (conn->sess->sess_ops->FirstBurstLength -
+                                               offset);
+                               }
+                               if (!datasequenceinorder) {
+                                       seq[seq_no].type = SEQTYPE_UNSOLICITED;
+                                       seq[seq_no].pdu_count = pdu_count;
+                                       seq[seq_no].xfer_len = (burstlength +
+                                          (conn->sess->sess_ops->FirstBurstLength -
+                                               offset));
+                               }
+                               unsolicited_data_length -=
+                                       (conn->sess->sess_ops->FirstBurstLength -
+                                               offset);
+                               offset += (conn->sess->sess_ops->FirstBurstLength -
+                                               offset);
+                               burstlength = 0;
+                               pdu_count = 0;
+                               seq_no++;
+                               continue;
+                       }
+
+                       if (!datapduinorder) {
+                               pdu[i].type = PDUTYPE_UNSOLICITED;
+                               pdu[i++].length =
+                                    conn->conn_ops->MaxRecvDataSegmentLength;
+                       }
+                       burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+                       offset += conn->conn_ops->MaxRecvDataSegmentLength;
+                       unsolicited_data_length -=
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+                       continue;
+               }
+               if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+                    cmd->data_length) {
+                       if (!datapduinorder) {
+                               pdu[i].type = PDUTYPE_NORMAL;
+                               pdu[i].length = (cmd->data_length - offset);
+                       }
+                       if (!datasequenceinorder) {
+                               seq[seq_no].type = SEQTYPE_NORMAL;
+                               seq[seq_no].pdu_count = pdu_count;
+                               seq[seq_no].xfer_len = (burstlength +
+                                       (cmd->data_length - offset));
+                       }
+                       offset += (cmd->data_length - offset);
+                       continue;
+               }
+               if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >=
+                    conn->sess->sess_ops->MaxBurstLength) {
+                       if (!datapduinorder) {
+                               pdu[i].type = PDUTYPE_NORMAL;
+                               pdu[i++].length =
+                                       (conn->sess->sess_ops->MaxBurstLength -
+                                               burstlength);
+                       }
+                       if (!datasequenceinorder) {
+                               seq[seq_no].type = SEQTYPE_NORMAL;
+                               seq[seq_no].pdu_count = pdu_count;
+                               seq[seq_no].xfer_len = (burstlength +
+                                       (conn->sess->sess_ops->MaxBurstLength -
+                                       burstlength));
+                       }
+                       offset += (conn->sess->sess_ops->MaxBurstLength -
+                                       burstlength);
+                       burstlength = 0;
+                       pdu_count = 0;
+                       seq_no++;
+                       continue;
+               }
+
+               if (!datapduinorder) {
+                       pdu[i].type = PDUTYPE_NORMAL;
+                       pdu[i++].length =
+                               conn->conn_ops->MaxRecvDataSegmentLength;
+               }
+               burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+               offset += conn->conn_ops->MaxRecvDataSegmentLength;
+       }
+
+       if (!datasequenceinorder) {
+               if (bl->data_direction & ISCSI_PDU_WRITE) {
+                       if (bl->randomize & RANDOM_R2T_OFFSETS) {
+                               if (iscsit_randomize_seq_lists(cmd, bl->type)
+                                               < 0)
+                                       return -1;
+                       } else
+                               iscsit_ordered_seq_lists(cmd, bl->type);
+               } else if (bl->data_direction & ISCSI_PDU_READ) {
+                       if (bl->randomize & RANDOM_DATAIN_SEQ_OFFSETS) {
+                               if (iscsit_randomize_seq_lists(cmd, bl->type)
+                                               < 0)
+                                       return -1;
+                       } else
+                               iscsit_ordered_seq_lists(cmd, bl->type);
+               }
+#if 0
+               iscsit_dump_seq_list(cmd);
+#endif
+       }
+       if (!datapduinorder) {
+               if (bl->data_direction & ISCSI_PDU_WRITE) {
+                       if (bl->randomize & RANDOM_DATAOUT_PDU_OFFSETS) {
+                               if (iscsit_randomize_pdu_lists(cmd, bl->type)
+                                               < 0)
+                                       return -1;
+                       } else
+                               iscsit_ordered_pdu_lists(cmd, bl->type);
+               } else if (bl->data_direction & ISCSI_PDU_READ) {
+                       if (bl->randomize & RANDOM_DATAIN_PDU_OFFSETS) {
+                               if (iscsit_randomize_pdu_lists(cmd, bl->type)
+                                               < 0)
+                                       return -1;
+                       } else
+                               iscsit_ordered_pdu_lists(cmd, bl->type);
+               }
+#if 0
+               iscsit_dump_pdu_list(cmd);
+#endif
+       }
+
+       return 0;
+}
+
+/*
+ *     Only called while DataSequenceInOrder=No or DataPDUInOrder=No.
+ */
+int iscsit_do_build_list(
+       struct iscsi_cmd *cmd,
+       struct iscsi_build_list *bl)
+{
+       u32 pdu_count = 0, seq_count = 1;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_pdu *pdu = NULL;
+       struct iscsi_seq *seq = NULL;
+
+       iscsit_determine_counts_for_list(cmd, bl, &seq_count, &pdu_count);
+
+       if (!conn->sess->sess_ops->DataSequenceInOrder) {
+               seq = kzalloc(seq_count * sizeof(struct iscsi_seq), GFP_ATOMIC);
+               if (!seq) {
+                       pr_err("Unable to allocate struct iscsi_seq list\n");
+                       return -1;
+               }
+               cmd->seq_list = seq;
+               cmd->seq_count = seq_count;
+       }
+
+       if (!conn->sess->sess_ops->DataPDUInOrder) {
+               pdu = kzalloc(pdu_count * sizeof(struct iscsi_pdu), GFP_ATOMIC);
+               if (!pdu) {
+                       pr_err("Unable to allocate struct iscsi_pdu list.\n");
+                       kfree(seq);
+                       return -1;
+               }
+               cmd->pdu_list = pdu;
+               cmd->pdu_count = pdu_count;
+       }
+
+       return iscsit_build_pdu_and_seq_list(cmd, bl);
+}
+
+struct iscsi_pdu *iscsit_get_pdu_holder(
+       struct iscsi_cmd *cmd,
+       u32 offset,
+       u32 length)
+{
+       u32 i;
+       struct iscsi_pdu *pdu = NULL;
+
+       if (!cmd->pdu_list) {
+               pr_err("struct iscsi_cmd->pdu_list is NULL!\n");
+               return NULL;
+       }
+
+       pdu = &cmd->pdu_list[0];
+
+       for (i = 0; i < cmd->pdu_count; i++)
+               if ((pdu[i].offset == offset) && (pdu[i].length == length))
+                       return &pdu[i];
+
+       pr_err("Unable to locate PDU holder for ITT: 0x%08x, Offset:"
+               " %u, Length: %u\n", cmd->init_task_tag, offset, length);
+       return NULL;
+}
+
+struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(
+       struct iscsi_cmd *cmd,
+       struct iscsi_seq *seq)
+{
+       u32 i;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_pdu *pdu = NULL;
+
+       if (!cmd->pdu_list) {
+               pr_err("struct iscsi_cmd->pdu_list is NULL!\n");
+               return NULL;
+       }
+
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+redo:
+               pdu = &cmd->pdu_list[cmd->pdu_start];
+
+               for (i = 0; pdu[i].seq_no != cmd->seq_no; i++) {
+#if 0
+                       pr_debug("pdu[i].seq_no: %d, pdu[i].pdu"
+                               "_send_order: %d, pdu[i].offset: %d,"
+                               " pdu[i].length: %d\n", pdu[i].seq_no,
+                               pdu[i].pdu_send_order, pdu[i].offset,
+                               pdu[i].length);
+#endif
+                       if (pdu[i].pdu_send_order == cmd->pdu_send_order) {
+                               cmd->pdu_send_order++;
+                               return &pdu[i];
+                       }
+               }
+
+               cmd->pdu_start += cmd->pdu_send_order;
+               cmd->pdu_send_order = 0;
+               cmd->seq_no++;
+
+               if (cmd->pdu_start < cmd->pdu_count)
+                       goto redo;
+
+               pr_err("Command ITT: 0x%08x unable to locate"
+                       " struct iscsi_pdu for cmd->pdu_send_order: %u.\n",
+                       cmd->init_task_tag, cmd->pdu_send_order);
+               return NULL;
+       } else {
+               if (!seq) {
+                       pr_err("struct iscsi_seq is NULL!\n");
+                       return NULL;
+               }
+#if 0
+               pr_debug("seq->pdu_start: %d, seq->pdu_count: %d,"
+                       " seq->seq_no: %d\n", seq->pdu_start, seq->pdu_count,
+                       seq->seq_no);
+#endif
+               pdu = &cmd->pdu_list[seq->pdu_start];
+
+               if (seq->pdu_send_order == seq->pdu_count) {
+                       pr_err("Command ITT: 0x%08x seq->pdu_send"
+                               "_order: %u equals seq->pdu_count: %u\n",
+                               cmd->init_task_tag, seq->pdu_send_order,
+                               seq->pdu_count);
+                       return NULL;
+               }
+
+               for (i = 0; i < seq->pdu_count; i++) {
+                       if (pdu[i].pdu_send_order == seq->pdu_send_order) {
+                               seq->pdu_send_order++;
+                               return &pdu[i];
+                       }
+               }
+
+               pr_err("Command ITT: 0x%08x unable to locate iscsi"
+                       "_pdu_t for seq->pdu_send_order: %u.\n",
+                       cmd->init_task_tag, seq->pdu_send_order);
+               return NULL;
+       }
+
+       return NULL;
+}
+
+struct iscsi_seq *iscsit_get_seq_holder(
+       struct iscsi_cmd *cmd,
+       u32 offset,
+       u32 length)
+{
+       u32 i;
+
+       if (!cmd->seq_list) {
+               pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+               return NULL;
+       }
+
+       for (i = 0; i < cmd->seq_count; i++) {
+#if 0
+               pr_debug("seq_list[i].orig_offset: %d, seq_list[i]."
+                       "xfer_len: %d, seq_list[i].seq_no %u\n",
+                       cmd->seq_list[i].orig_offset, cmd->seq_list[i].xfer_len,
+                       cmd->seq_list[i].seq_no);
+#endif
+               if ((cmd->seq_list[i].orig_offset +
+                               cmd->seq_list[i].xfer_len) >=
+                               (offset + length))
+                       return &cmd->seq_list[i];
+       }
+
+       pr_err("Unable to locate Sequence holder for ITT: 0x%08x,"
+               " Offset: %u, Length: %u\n", cmd->init_task_tag, offset,
+               length);
+       return NULL;
+}
diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h
new file mode 100644 (file)
index 0000000..0d52a10
--- /dev/null
@@ -0,0 +1,86 @@
+#ifndef ISCSI_SEQ_AND_PDU_LIST_H
+#define ISCSI_SEQ_AND_PDU_LIST_H
+
+/* struct iscsi_pdu->status */
+#define DATAOUT_PDU_SENT                       1
+
+/* struct iscsi_seq->type */
+#define SEQTYPE_IMMEDIATE                      1
+#define SEQTYPE_UNSOLICITED                    2
+#define SEQTYPE_NORMAL                         3
+
+/* struct iscsi_seq->status */
+#define DATAOUT_SEQUENCE_GOT_R2T               1
+#define DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY 2
+#define DATAOUT_SEQUENCE_COMPLETE              3
+
+/* iscsi_determine_counts_for_list() type */
+#define PDULIST_NORMAL                         1
+#define PDULIST_IMMEDIATE                      2
+#define PDULIST_UNSOLICITED                    3
+#define PDULIST_IMMEDIATE_AND_UNSOLICITED      4
+
+/* struct iscsi_pdu->type */
+#define PDUTYPE_IMMEDIATE                      1
+#define PDUTYPE_UNSOLICITED                    2
+#define PDUTYPE_NORMAL                         3
+
+/* struct iscsi_pdu->status */
+#define ISCSI_PDU_NOT_RECEIVED                 0
+#define ISCSI_PDU_RECEIVED_OK                  1
+#define ISCSI_PDU_CRC_FAILED                   2
+#define ISCSI_PDU_TIMED_OUT                    3
+
+/* struct iscsi_build_list->randomize */
+#define RANDOM_DATAIN_PDU_OFFSETS              0x01
+#define RANDOM_DATAIN_SEQ_OFFSETS              0x02
+#define RANDOM_DATAOUT_PDU_OFFSETS             0x04
+#define RANDOM_R2T_OFFSETS                     0x08
+
+/* struct iscsi_build_list->data_direction */
+#define ISCSI_PDU_READ                         0x01
+#define ISCSI_PDU_WRITE                                0x02
+
+struct iscsi_build_list {
+       int             data_direction;
+       int             randomize;
+       int             type;
+       int             immediate_data_length;
+};
+
+struct iscsi_pdu {
+       int             status;
+       int             type;
+       u8              flags;
+       u32             data_sn;
+       u32             length;
+       u32             offset;
+       u32             pdu_send_order;
+       u32             seq_no;
+} ____cacheline_aligned;
+
+struct iscsi_seq {
+       int             sent;
+       int             status;
+       int             type;
+       u32             data_sn;
+       u32             first_datasn;
+       u32             last_datasn;
+       u32             next_burst_len;
+       u32             pdu_start;
+       u32             pdu_count;
+       u32             offset;
+       u32             orig_offset;
+       u32             pdu_send_order;
+       u32             r2t_sn;
+       u32             seq_send_order;
+       u32             seq_no;
+       u32             xfer_len;
+} ____cacheline_aligned;
+
+extern int iscsit_do_build_list(struct iscsi_cmd *, struct iscsi_build_list *);
+extern struct iscsi_pdu *iscsit_get_pdu_holder(struct iscsi_cmd *, u32, u32);
+extern struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(struct iscsi_cmd *, struct iscsi_seq *);
+extern struct iscsi_seq *iscsit_get_seq_holder(struct iscsi_cmd *, u32, u32);
+
+#endif /* ISCSI_SEQ_AND_PDU_LIST_H */
diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c
new file mode 100644 (file)
index 0000000..bbdbe93
--- /dev/null
@@ -0,0 +1,950 @@
+/*******************************************************************************
+ * Modern ConfigFS group context specific iSCSI statistics based on original
+ * iscsi_target_mib.c code
+ *
+ * Copyright (c) 2011 Rising Tide Systems
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/configfs.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/configfs_macros.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_stat.h"
+
+#ifndef INITIAL_JIFFIES
+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
+#endif
+
+/* Instance Attributes Table */
+#define ISCSI_INST_NUM_NODES           1
+#define ISCSI_INST_DESCR               "Storage Engine Target"
+#define ISCSI_INST_LAST_FAILURE_TYPE   0
+#define ISCSI_DISCONTINUITY_TIME       0
+
+#define ISCSI_NODE_INDEX               1
+
+#define ISPRINT(a)   ((a >= ' ') && (a <= '~'))
+
+/****************************************************************************
+ * iSCSI MIB Tables
+ ****************************************************************************/
+/*
+ * Instance Attributes Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_instance, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_INSTANCE_ATTR(_name, _mode)                 \
+static struct iscsi_stat_instance_attribute                    \
+                       iscsi_stat_instance_##_name =           \
+       __CONFIGFS_EATTR(_name, _mode,                          \
+       iscsi_stat_instance_show_attr_##_name,                  \
+       iscsi_stat_instance_store_attr_##_name);
+
+#define ISCSI_STAT_INSTANCE_ATTR_RO(_name)                     \
+static struct iscsi_stat_instance_attribute                    \
+                       iscsi_stat_instance_##_name =           \
+       __CONFIGFS_EATTR_RO(_name,                              \
+       iscsi_stat_instance_show_attr_##_name);
+
+static ssize_t iscsi_stat_instance_show_attr_inst(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_instance_show_attr_min_ver(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(min_ver);
+
+static ssize_t iscsi_stat_instance_show_attr_max_ver(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(max_ver);
+
+static ssize_t iscsi_stat_instance_show_attr_portals(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_num_tpg_nps);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(portals);
+
+static ssize_t iscsi_stat_instance_show_attr_nodes(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_INST_NUM_NODES);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(nodes);
+
+static ssize_t iscsi_stat_instance_show_attr_sessions(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_nsessions);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(sessions);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_sess(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+       u32 sess_err_count;
+
+       spin_lock_bh(&sess_err->lock);
+       sess_err_count = (sess_err->digest_errors +
+                         sess_err->cxn_timeout_errors +
+                         sess_err->pdu_format_errors);
+       spin_unlock_bh(&sess_err->lock);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", sess_err_count);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_sess);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_type(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+       return snprintf(page, PAGE_SIZE, "%u\n",
+                       sess_err->last_sess_failure_type);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_type);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_rem_name(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+       return snprintf(page, PAGE_SIZE, "%s\n",
+                       sess_err->last_sess_fail_rem_name[0] ?
+                       sess_err->last_sess_fail_rem_name : NONE);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_rem_name);
+
+static ssize_t iscsi_stat_instance_show_attr_disc_time(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DISCONTINUITY_TIME);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(disc_time);
+
+static ssize_t iscsi_stat_instance_show_attr_description(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%s\n", ISCSI_INST_DESCR);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(description);
+
+static ssize_t iscsi_stat_instance_show_attr_vendor(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n");
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(vendor);
+
+static ssize_t iscsi_stat_instance_show_attr_version(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%s\n", ISCSIT_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(version);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_instance, iscsi_wwn_stat_grps,
+               iscsi_instance_group);
+
+static struct configfs_attribute *iscsi_stat_instance_attrs[] = {
+       &iscsi_stat_instance_inst.attr,
+       &iscsi_stat_instance_min_ver.attr,
+       &iscsi_stat_instance_max_ver.attr,
+       &iscsi_stat_instance_portals.attr,
+       &iscsi_stat_instance_nodes.attr,
+       &iscsi_stat_instance_sessions.attr,
+       &iscsi_stat_instance_fail_sess.attr,
+       &iscsi_stat_instance_fail_type.attr,
+       &iscsi_stat_instance_fail_rem_name.attr,
+       &iscsi_stat_instance_disc_time.attr,
+       &iscsi_stat_instance_description.attr,
+       &iscsi_stat_instance_vendor.attr,
+       &iscsi_stat_instance_version.attr,
+       NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_instance_item_ops = {
+       .show_attribute         = iscsi_stat_instance_attr_show,
+       .store_attribute        = iscsi_stat_instance_attr_store,
+};
+
+struct config_item_type iscsi_stat_instance_cit = {
+       .ct_item_ops            = &iscsi_stat_instance_item_ops,
+       .ct_attrs               = iscsi_stat_instance_attrs,
+       .ct_owner               = THIS_MODULE,
+};
+
+/*
+ * Instance Session Failure Stats Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_sess_err, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_SESS_ERR_ATTR(_name, _mode)                 \
+static struct iscsi_stat_sess_err_attribute                    \
+                       iscsi_stat_sess_err_##_name =           \
+       __CONFIGFS_EATTR(_name, _mode,                          \
+       iscsi_stat_sess_err_show_attr_##_name,                  \
+       iscsi_stat_sess_err_store_attr_##_name);
+
+#define ISCSI_STAT_SESS_ERR_ATTR_RO(_name)                     \
+static struct iscsi_stat_sess_err_attribute                    \
+                       iscsi_stat_sess_err_##_name =           \
+       __CONFIGFS_EATTR_RO(_name,                              \
+       iscsi_stat_sess_err_show_attr_##_name);
+
+static ssize_t iscsi_stat_sess_err_show_attr_inst(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_sess_err_show_attr_digest_errors(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+       return snprintf(page, PAGE_SIZE, "%u\n", sess_err->digest_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(digest_errors);
+
+static ssize_t iscsi_stat_sess_err_show_attr_cxn_errors(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+       return snprintf(page, PAGE_SIZE, "%u\n", sess_err->cxn_timeout_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(cxn_errors);
+
+static ssize_t iscsi_stat_sess_err_show_attr_format_errors(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+       return snprintf(page, PAGE_SIZE, "%u\n", sess_err->pdu_format_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(format_errors);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_sess_err, iscsi_wwn_stat_grps,
+               iscsi_sess_err_group);
+
+static struct configfs_attribute *iscsi_stat_sess_err_attrs[] = {
+       &iscsi_stat_sess_err_inst.attr,
+       &iscsi_stat_sess_err_digest_errors.attr,
+       &iscsi_stat_sess_err_cxn_errors.attr,
+       &iscsi_stat_sess_err_format_errors.attr,
+       NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_sess_err_item_ops = {
+       .show_attribute         = iscsi_stat_sess_err_attr_show,
+       .store_attribute        = iscsi_stat_sess_err_attr_store,
+};
+
+struct config_item_type iscsi_stat_sess_err_cit = {
+       .ct_item_ops            = &iscsi_stat_sess_err_item_ops,
+       .ct_attrs               = iscsi_stat_sess_err_attrs,
+       .ct_owner               = THIS_MODULE,
+};
+
+/*
+ * Target Attributes Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_TGT_ATTR(_name, _mode)                      \
+static struct iscsi_stat_tgt_attr_attribute                    \
+                       iscsi_stat_tgt_attr_##_name =           \
+       __CONFIGFS_EATTR(_name, _mode,                          \
+       iscsi_stat_tgt-attr_show_attr_##_name,                  \
+       iscsi_stat_tgt_attr_store_attr_##_name);
+
+#define ISCSI_STAT_TGT_ATTR_RO(_name)                          \
+static struct iscsi_stat_tgt_attr_attribute                    \
+                       iscsi_stat_tgt_attr_##_name =           \
+       __CONFIGFS_EATTR_RO(_name,                              \
+       iscsi_stat_tgt_attr_show_attr_##_name);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_inst(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_TGT_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_indx(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_TGT_ATTR_RO(indx);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_login_fails(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       u32 fail_count;
+
+       spin_lock(&lstat->lock);
+       fail_count = (lstat->redirects + lstat->authorize_fails +
+                       lstat->authenticate_fails + lstat->negotiate_fails +
+                       lstat->other_fails);
+       spin_unlock(&lstat->lock);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", fail_count);
+}
+ISCSI_STAT_TGT_ATTR_RO(login_fails);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_time(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       u32 last_fail_time;
+
+       spin_lock(&lstat->lock);
+       last_fail_time = lstat->last_fail_time ?
+                       (u32)(((u32)lstat->last_fail_time -
+                               INITIAL_JIFFIES) * 100 / HZ) : 0;
+       spin_unlock(&lstat->lock);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", last_fail_time);
+}
+ISCSI_STAT_TGT_ATTR_RO(last_fail_time);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_type(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       u32 last_fail_type;
+
+       spin_lock(&lstat->lock);
+       last_fail_type = lstat->last_fail_type;
+       spin_unlock(&lstat->lock);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", last_fail_type);
+}
+ISCSI_STAT_TGT_ATTR_RO(last_fail_type);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_name(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       unsigned char buf[224];
+
+       spin_lock(&lstat->lock);
+       snprintf(buf, 224, "%s", lstat->last_intr_fail_name[0] ?
+                               lstat->last_intr_fail_name : NONE);
+       spin_unlock(&lstat->lock);
+
+       return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_name);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr_type(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                       struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       unsigned char buf[8];
+
+       spin_lock(&lstat->lock);
+       snprintf(buf, 8, "%s", (lstat->last_intr_fail_ip_addr != NULL) ?
+                               "ipv6" : "ipv4");
+       spin_unlock(&lstat->lock);
+
+       return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr_type);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                       struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       unsigned char buf[32];
+
+       spin_lock(&lstat->lock);
+       if (lstat->last_intr_fail_ip_family == AF_INET6)
+               snprintf(buf, 32, "[%s]", lstat->last_intr_fail_ip_addr);
+       else
+               snprintf(buf, 32, "%s", lstat->last_intr_fail_ip_addr);
+       spin_unlock(&lstat->lock);
+
+       return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps,
+               iscsi_tgt_attr_group);
+
+static struct configfs_attribute *iscsi_stat_tgt_attr_attrs[] = {
+       &iscsi_stat_tgt_attr_inst.attr,
+       &iscsi_stat_tgt_attr_indx.attr,
+       &iscsi_stat_tgt_attr_login_fails.attr,
+       &iscsi_stat_tgt_attr_last_fail_time.attr,
+       &iscsi_stat_tgt_attr_last_fail_type.attr,
+       &iscsi_stat_tgt_attr_fail_intr_name.attr,
+       &iscsi_stat_tgt_attr_fail_intr_addr_type.attr,
+       &iscsi_stat_tgt_attr_fail_intr_addr.attr,
+       NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_tgt_attr_item_ops = {
+       .show_attribute         = iscsi_stat_tgt_attr_attr_show,
+       .store_attribute        = iscsi_stat_tgt_attr_attr_store,
+};
+
+struct config_item_type iscsi_stat_tgt_attr_cit = {
+       .ct_item_ops            = &iscsi_stat_tgt_attr_item_ops,
+       .ct_attrs               = iscsi_stat_tgt_attr_attrs,
+       .ct_owner               = THIS_MODULE,
+};
+
+/*
+ * Target Login Stats Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_login, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_LOGIN(_name, _mode)                         \
+static struct iscsi_stat_login_attribute                       \
+                       iscsi_stat_login_##_name =              \
+       __CONFIGFS_EATTR(_name, _mode,                          \
+       iscsi_stat_login_show_attr_##_name,                     \
+       iscsi_stat_login_store_attr_##_name);
+
+#define ISCSI_STAT_LOGIN_RO(_name)                             \
+static struct iscsi_stat_login_attribute                       \
+                       iscsi_stat_login_##_name =              \
+       __CONFIGFS_EATTR_RO(_name,                              \
+       iscsi_stat_login_show_attr_##_name);
+
+static ssize_t iscsi_stat_login_show_attr_inst(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_LOGIN_RO(inst);
+
+static ssize_t iscsi_stat_login_show_attr_indx(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_LOGIN_RO(indx);
+
+static ssize_t iscsi_stat_login_show_attr_accepts(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       ssize_t ret;
+
+       spin_lock(&lstat->lock);
+       ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->accepts);
+       spin_unlock(&lstat->lock);
+
+       return ret;
+}
+ISCSI_STAT_LOGIN_RO(accepts);
+
+static ssize_t iscsi_stat_login_show_attr_other_fails(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       ssize_t ret;
+
+       spin_lock(&lstat->lock);
+       ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->other_fails);
+       spin_unlock(&lstat->lock);
+
+       return ret;
+}
+ISCSI_STAT_LOGIN_RO(other_fails);
+
+static ssize_t iscsi_stat_login_show_attr_redirects(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       ssize_t ret;
+
+       spin_lock(&lstat->lock);
+       ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->redirects);
+       spin_unlock(&lstat->lock);
+
+       return ret;
+}
+ISCSI_STAT_LOGIN_RO(redirects);
+
+static ssize_t iscsi_stat_login_show_attr_authorize_fails(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       ssize_t ret;
+
+       spin_lock(&lstat->lock);
+       ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authorize_fails);
+       spin_unlock(&lstat->lock);
+
+       return ret;
+}
+ISCSI_STAT_LOGIN_RO(authorize_fails);
+
+static ssize_t iscsi_stat_login_show_attr_authenticate_fails(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       ssize_t ret;
+
+       spin_lock(&lstat->lock);
+       ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authenticate_fails);
+       spin_unlock(&lstat->lock);
+
+       return ret;
+}
+ISCSI_STAT_LOGIN_RO(authenticate_fails);
+
+static ssize_t iscsi_stat_login_show_attr_negotiate_fails(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                               struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_login_stats *lstat = &tiqn->login_stats;
+       ssize_t ret;
+
+       spin_lock(&lstat->lock);
+       ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->negotiate_fails);
+       spin_unlock(&lstat->lock);
+
+       return ret;
+}
+ISCSI_STAT_LOGIN_RO(negotiate_fails);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_login, iscsi_wwn_stat_grps,
+               iscsi_login_stats_group);
+
+static struct configfs_attribute *iscsi_stat_login_stats_attrs[] = {
+       &iscsi_stat_login_inst.attr,
+       &iscsi_stat_login_indx.attr,
+       &iscsi_stat_login_accepts.attr,
+       &iscsi_stat_login_other_fails.attr,
+       &iscsi_stat_login_redirects.attr,
+       &iscsi_stat_login_authorize_fails.attr,
+       &iscsi_stat_login_authenticate_fails.attr,
+       &iscsi_stat_login_negotiate_fails.attr,
+       NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_login_stats_item_ops = {
+       .show_attribute         = iscsi_stat_login_attr_show,
+       .store_attribute        = iscsi_stat_login_attr_store,
+};
+
+struct config_item_type iscsi_stat_login_cit = {
+       .ct_item_ops            = &iscsi_stat_login_stats_item_ops,
+       .ct_attrs               = iscsi_stat_login_stats_attrs,
+       .ct_owner               = THIS_MODULE,
+};
+
+/*
+ * Target Logout Stats Table
+ */
+
+CONFIGFS_EATTR_STRUCT(iscsi_stat_logout, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_LOGOUT(_name, _mode)                                \
+static struct iscsi_stat_logout_attribute                      \
+                       iscsi_stat_logout_##_name =             \
+       __CONFIGFS_EATTR(_name, _mode,                          \
+       iscsi_stat_logout_show_attr_##_name,                    \
+       iscsi_stat_logout_store_attr_##_name);
+
+#define ISCSI_STAT_LOGOUT_RO(_name)                            \
+static struct iscsi_stat_logout_attribute                      \
+                       iscsi_stat_logout_##_name =             \
+       __CONFIGFS_EATTR_RO(_name,                              \
+       iscsi_stat_logout_show_attr_##_name);
+
+static ssize_t iscsi_stat_logout_show_attr_inst(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                       struct iscsi_tiqn, tiqn_stat_grps);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_LOGOUT_RO(inst);
+
+static ssize_t iscsi_stat_logout_show_attr_indx(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_LOGOUT_RO(indx);
+
+static ssize_t iscsi_stat_logout_show_attr_normal_logouts(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                       struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_logout_stats *lstats = &tiqn->logout_stats;
+
+       return snprintf(page, PAGE_SIZE, "%u\n", lstats->normal_logouts);
+}
+ISCSI_STAT_LOGOUT_RO(normal_logouts);
+
+static ssize_t iscsi_stat_logout_show_attr_abnormal_logouts(
+       struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+       struct iscsi_tiqn *tiqn = container_of(igrps,
+                       struct iscsi_tiqn, tiqn_stat_grps);
+       struct iscsi_logout_stats *lstats = &tiqn->logout_stats;
+
+       return snprintf(page, PAGE_SIZE, "%u\n", lstats->abnormal_logouts);
+}
+ISCSI_STAT_LOGOUT_RO(abnormal_logouts);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_logout, iscsi_wwn_stat_grps,
+               iscsi_logout_stats_group);
+
+static struct configfs_attribute *iscsi_stat_logout_stats_attrs[] = {
+       &iscsi_stat_logout_inst.attr,
+       &iscsi_stat_logout_indx.attr,
+       &iscsi_stat_logout_normal_logouts.attr,
+       &iscsi_stat_logout_abnormal_logouts.attr,
+       NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_logout_stats_item_ops = {
+       .show_attribute         = iscsi_stat_logout_attr_show,
+       .store_attribute        = iscsi_stat_logout_attr_store,
+};
+
+struct config_item_type iscsi_stat_logout_cit = {
+       .ct_item_ops            = &iscsi_stat_logout_stats_item_ops,
+       .ct_attrs               = iscsi_stat_logout_stats_attrs,
+       .ct_owner               = THIS_MODULE,
+};
+
+/*
+ * Session Stats Table
+ */
+
+CONFIGFS_EATTR_STRUCT(iscsi_stat_sess, iscsi_node_stat_grps);
+#define ISCSI_STAT_SESS(_name, _mode)                          \
+static struct iscsi_stat_sess_attribute                                \
+                       iscsi_stat_sess_##_name =               \
+       __CONFIGFS_EATTR(_name, _mode,                          \
+       iscsi_stat_sess_show_attr_##_name,                      \
+       iscsi_stat_sess_store_attr_##_name);
+
+#define ISCSI_STAT_SESS_RO(_name)                              \
+static struct iscsi_stat_sess_attribute                                \
+                       iscsi_stat_sess_##_name =               \
+       __CONFIGFS_EATTR_RO(_name,                              \
+       iscsi_stat_sess_show_attr_##_name);
+
+static ssize_t iscsi_stat_sess_show_attr_inst(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_wwn *wwn = acl->se_node_acl.se_tpg->se_tpg_wwn;
+       struct iscsi_tiqn *tiqn = container_of(wwn,
+                       struct iscsi_tiqn, tiqn_wwn);
+
+       return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_SESS_RO(inst);
+
+static ssize_t iscsi_stat_sess_show_attr_node(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%u\n",
+                               sess->sess_ops->SessionType ? 0 : ISCSI_NODE_INDEX);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(node);
+
+static ssize_t iscsi_stat_sess_show_attr_indx(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%u\n",
+                                       sess->session_index);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(indx);
+
+static ssize_t iscsi_stat_sess_show_attr_cmd_pdus(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(cmd_pdus);
+
+static ssize_t iscsi_stat_sess_show_attr_rsp_pdus(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(rsp_pdus);
+
+static ssize_t iscsi_stat_sess_show_attr_txdata_octs(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%llu\n",
+                               (unsigned long long)sess->tx_data_octets);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(txdata_octs);
+
+static ssize_t iscsi_stat_sess_show_attr_rxdata_octs(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%llu\n",
+                               (unsigned long long)sess->rx_data_octets);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(rxdata_octs);
+
+static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%u\n",
+                                       sess->conn_digest_errors);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(conn_digest_errors);
+
+static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors(
+       struct iscsi_node_stat_grps *igrps, char *page)
+{
+       struct iscsi_node_acl *acl = container_of(igrps,
+                       struct iscsi_node_acl, node_stat_grps);
+       struct se_node_acl *se_nacl = &acl->se_node_acl;
+       struct iscsi_session *sess;
+       struct se_session *se_sess;
+       ssize_t ret = 0;
+
+       spin_lock_bh(&se_nacl->nacl_sess_lock);
+       se_sess = se_nacl->nacl_sess;
+       if (se_sess) {
+               sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+               if (sess)
+                       ret = snprintf(page, PAGE_SIZE, "%u\n",
+                                       sess->conn_timeout_errors);
+       }
+       spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+       return ret;
+}
+ISCSI_STAT_SESS_RO(conn_timeout_errors);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_sess, iscsi_node_stat_grps,
+               iscsi_sess_stats_group);
+
+static struct configfs_attribute *iscsi_stat_sess_stats_attrs[] = {
+       &iscsi_stat_sess_inst.attr,
+       &iscsi_stat_sess_node.attr,
+       &iscsi_stat_sess_indx.attr,
+       &iscsi_stat_sess_cmd_pdus.attr,
+       &iscsi_stat_sess_rsp_pdus.attr,
+       &iscsi_stat_sess_txdata_octs.attr,
+       &iscsi_stat_sess_rxdata_octs.attr,
+       &iscsi_stat_sess_conn_digest_errors.attr,
+       &iscsi_stat_sess_conn_timeout_errors.attr,
+       NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_sess_stats_item_ops = {
+       .show_attribute         = iscsi_stat_sess_attr_show,
+       .store_attribute        = iscsi_stat_sess_attr_store,
+};
+
+struct config_item_type iscsi_stat_sess_cit = {
+       .ct_item_ops            = &iscsi_stat_sess_stats_item_ops,
+       .ct_attrs               = iscsi_stat_sess_stats_attrs,
+       .ct_owner               = THIS_MODULE,
+};
diff --git a/drivers/target/iscsi/iscsi_target_stat.h b/drivers/target/iscsi/iscsi_target_stat.h
new file mode 100644 (file)
index 0000000..3ff76b4
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef ISCSI_TARGET_STAT_H
+#define ISCSI_TARGET_STAT_H
+
+/*
+ * For struct iscsi_tiqn->tiqn_wwn default groups
+ */
+extern struct config_item_type iscsi_stat_instance_cit;
+extern struct config_item_type iscsi_stat_sess_err_cit;
+extern struct config_item_type iscsi_stat_tgt_attr_cit;
+extern struct config_item_type iscsi_stat_login_cit;
+extern struct config_item_type iscsi_stat_logout_cit;
+
+/*
+ * For struct iscsi_session->se_sess default groups
+ */
+extern struct config_item_type iscsi_stat_sess_cit;
+
+/* iSCSI session error types */
+#define ISCSI_SESS_ERR_UNKNOWN         0
+#define ISCSI_SESS_ERR_DIGEST          1
+#define ISCSI_SESS_ERR_CXN_TIMEOUT     2
+#define ISCSI_SESS_ERR_PDU_FORMAT      3
+
+/* iSCSI session error stats */
+struct iscsi_sess_err_stats {
+       spinlock_t      lock;
+       u32             digest_errors;
+       u32             cxn_timeout_errors;
+       u32             pdu_format_errors;
+       u32             last_sess_failure_type;
+       char            last_sess_fail_rem_name[224];
+} ____cacheline_aligned;
+
+/* iSCSI login failure types (sub oids) */
+#define ISCSI_LOGIN_FAIL_OTHER         2
+#define ISCSI_LOGIN_FAIL_REDIRECT      3
+#define ISCSI_LOGIN_FAIL_AUTHORIZE     4
+#define ISCSI_LOGIN_FAIL_AUTHENTICATE  5
+#define ISCSI_LOGIN_FAIL_NEGOTIATE     6
+
+/* iSCSI login stats */
+struct iscsi_login_stats {
+       spinlock_t      lock;
+       u32             accepts;
+       u32             other_fails;
+       u32             redirects;
+       u32             authorize_fails;
+       u32             authenticate_fails;
+       u32             negotiate_fails;        /* used for notifications */
+       u64             last_fail_time;         /* time stamp (jiffies) */
+       u32             last_fail_type;
+       int             last_intr_fail_ip_family;
+       unsigned char   last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE];
+       char            last_intr_fail_name[224];
+} ____cacheline_aligned;
+
+/* iSCSI logout stats */
+struct iscsi_logout_stats {
+       spinlock_t      lock;
+       u32             normal_logouts;
+       u32             abnormal_logouts;
+} ____cacheline_aligned;
+
+#endif   /*** ISCSI_TARGET_STAT_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
new file mode 100644 (file)
index 0000000..db1fe1e
--- /dev/null
@@ -0,0 +1,849 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target specific Task Management functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <asm/unaligned.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_tmr.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+u8 iscsit_tmr_abort_task(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       struct iscsi_cmd *ref_cmd;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+       struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+       struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
+
+       ref_cmd = iscsit_find_cmd_from_itt(conn, hdr->rtt);
+       if (!ref_cmd) {
+               pr_err("Unable to locate RefTaskTag: 0x%08x on CID:"
+                       " %hu.\n", hdr->rtt, conn->cid);
+               return ((hdr->refcmdsn >= conn->sess->exp_cmd_sn) &&
+                       (hdr->refcmdsn <= conn->sess->max_cmd_sn)) ?
+                       ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK;
+       }
+       if (ref_cmd->cmd_sn != hdr->refcmdsn) {
+               pr_err("RefCmdSN 0x%08x does not equal"
+                       " task's CmdSN 0x%08x. Rejecting ABORT_TASK.\n",
+                       hdr->refcmdsn, ref_cmd->cmd_sn);
+               return ISCSI_TMF_RSP_REJECTED;
+       }
+
+       se_tmr->ref_task_tag            = hdr->rtt;
+       se_tmr->ref_cmd                 = &ref_cmd->se_cmd;
+       tmr_req->ref_cmd_sn             = hdr->refcmdsn;
+       tmr_req->exp_data_sn            = hdr->exp_datasn;
+
+       return ISCSI_TMF_RSP_COMPLETE;
+}
+
+/*
+ *     Called from iscsit_handle_task_mgt_cmd().
+ */
+int iscsit_tmr_task_warm_reset(
+       struct iscsi_conn *conn,
+       struct iscsi_tmr_req *tmr_req,
+       unsigned char *buf)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+#if 0
+       struct iscsi_init_task_mgt_cmnd *hdr =
+               (struct iscsi_init_task_mgt_cmnd *) buf;
+#endif
+       if (!na->tmr_warm_reset) {
+               pr_err("TMR Opcode TARGET_WARM_RESET authorization"
+                       " failed for Initiator Node: %s\n",
+                       sess->se_sess->se_node_acl->initiatorname);
+                return -1;
+       }
+       /*
+        * Do the real work in transport_generic_do_tmr().
+        */
+       return 0;
+}
+
+int iscsit_tmr_task_cold_reset(
+       struct iscsi_conn *conn,
+       struct iscsi_tmr_req *tmr_req,
+       unsigned char *buf)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+       if (!na->tmr_cold_reset) {
+               pr_err("TMR Opcode TARGET_COLD_RESET authorization"
+                       " failed for Initiator Node: %s\n",
+                       sess->se_sess->se_node_acl->initiatorname);
+               return -1;
+       }
+       /*
+        * Do the real work in transport_generic_do_tmr().
+        */
+       return 0;
+}
+
+u8 iscsit_tmr_task_reassign(
+       struct iscsi_cmd *cmd,
+       unsigned char *buf)
+{
+       struct iscsi_cmd *ref_cmd = NULL;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_conn_recovery *cr = NULL;
+       struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+       struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+       struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
+       int ret;
+
+       pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x,"
+               " RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n",
+               hdr->itt, hdr->rtt, hdr->exp_datasn, conn->cid);
+
+       if (conn->sess->sess_ops->ErrorRecoveryLevel != 2) {
+               pr_err("TMR TASK_REASSIGN not supported in ERL<2,"
+                               " ignoring request.\n");
+               return ISCSI_TMF_RSP_NOT_SUPPORTED;
+       }
+
+       ret = iscsit_find_cmd_for_recovery(conn->sess, &ref_cmd, &cr, hdr->rtt);
+       if (ret == -2) {
+               pr_err("Command ITT: 0x%08x is still alligent to CID:"
+                       " %hu\n", ref_cmd->init_task_tag, cr->cid);
+               return ISCSI_TMF_RSP_TASK_ALLEGIANT;
+       } else if (ret == -1) {
+               pr_err("Unable to locate RefTaskTag: 0x%08x in"
+                       " connection recovery command list.\n", hdr->rtt);
+               return ISCSI_TMF_RSP_NO_TASK;
+       }
+       /*
+        * Temporary check to prevent connection recovery for
+        * connections with a differing MaxRecvDataSegmentLength.
+        */
+       if (cr->maxrecvdatasegmentlength !=
+           conn->conn_ops->MaxRecvDataSegmentLength) {
+               pr_err("Unable to perform connection recovery for"
+                       " differing MaxRecvDataSegmentLength, rejecting"
+                       " TMR TASK_REASSIGN.\n");
+               return ISCSI_TMF_RSP_REJECTED;
+       }
+
+       se_tmr->ref_task_tag            = hdr->rtt;
+       se_tmr->ref_cmd                 = &ref_cmd->se_cmd;
+       se_tmr->ref_task_lun            = get_unaligned_le64(&hdr->lun);
+       tmr_req->ref_cmd_sn             = hdr->refcmdsn;
+       tmr_req->exp_data_sn            = hdr->exp_datasn;
+       tmr_req->conn_recovery          = cr;
+       tmr_req->task_reassign          = 1;
+       /*
+        * Command can now be reassigned to a new connection.
+        * The task management response must be sent before the
+        * reassignment actually happens.  See iscsi_tmr_post_handler().
+        */
+       return ISCSI_TMF_RSP_COMPLETE;
+}
+
+static void iscsit_task_reassign_remove_cmd(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn_recovery *cr,
+       struct iscsi_session *sess)
+{
+       int ret;
+
+       spin_lock(&cr->conn_recovery_cmd_lock);
+       ret = iscsit_remove_cmd_from_connection_recovery(cmd, sess);
+       spin_unlock(&cr->conn_recovery_cmd_lock);
+       if (!ret) {
+               pr_debug("iSCSI connection recovery successful for CID:"
+                       " %hu on SID: %u\n", cr->cid, sess->sid);
+               iscsit_remove_active_connection_recovery_entry(cr, sess);
+       }
+}
+
+static int iscsit_task_reassign_complete_nop_out(
+       struct iscsi_tmr_req *tmr_req,
+       struct iscsi_conn *conn)
+{
+       struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+       struct se_cmd *se_cmd = se_tmr->ref_cmd;
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+       struct iscsi_conn_recovery *cr;
+
+       if (!cmd->cr) {
+               pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+                       " is NULL!\n", cmd->init_task_tag);
+               return -1;
+       }
+       cr = cmd->cr;
+
+       /*
+        * Reset the StatSN so a new one for this commands new connection
+        * will be assigned.
+        * Reset the ExpStatSN as well so we may receive Status SNACKs.
+        */
+       cmd->stat_sn = cmd->exp_stat_sn = 0;
+
+       iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess);
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       cmd->i_state = ISTATE_SEND_NOPIN;
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+       return 0;
+}
+
+static int iscsit_task_reassign_complete_write(
+       struct iscsi_cmd *cmd,
+       struct iscsi_tmr_req *tmr_req)
+{
+       int no_build_r2ts = 0;
+       u32 length = 0, offset = 0;
+       struct iscsi_conn *conn = cmd->conn;
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+       /*
+        * The Initiator must not send a R2T SNACK with a Begrun less than
+        * the TMR TASK_REASSIGN's ExpDataSN.
+        */
+       if (!tmr_req->exp_data_sn) {
+               cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK;
+               cmd->acked_data_sn = 0;
+       } else {
+               cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+               cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
+       }
+
+       /*
+        * The TMR TASK_REASSIGN's ExpDataSN contains the next R2TSN the
+        * Initiator is expecting.  The Target controls all WRITE operations
+        * so if we have received all DataOUT we can safety ignore Initiator.
+        */
+       if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+               if (!atomic_read(&cmd->transport_sent)) {
+                       pr_debug("WRITE ITT: 0x%08x: t_state: %d"
+                               " never sent to transport\n",
+                               cmd->init_task_tag, cmd->se_cmd.t_state);
+                       return transport_generic_handle_data(se_cmd);
+               }
+
+               cmd->i_state = ISTATE_SEND_STATUS;
+               iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+               return 0;
+       }
+
+       /*
+        * Special case to deal with DataSequenceInOrder=No and Non-Immeidate
+        * Unsolicited DataOut.
+        */
+       if (cmd->unsolicited_data) {
+               cmd->unsolicited_data = 0;
+
+               offset = cmd->next_burst_len = cmd->write_data_done;
+
+               if ((conn->sess->sess_ops->FirstBurstLength - offset) >=
+                    cmd->data_length) {
+                       no_build_r2ts = 1;
+                       length = (cmd->data_length - offset);
+               } else
+                       length = (conn->sess->sess_ops->FirstBurstLength - offset);
+
+               spin_lock_bh(&cmd->r2t_lock);
+               if (iscsit_add_r2t_to_list(cmd, offset, length, 0, 0) < 0) {
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return -1;
+               }
+               cmd->outstanding_r2ts++;
+               spin_unlock_bh(&cmd->r2t_lock);
+
+               if (no_build_r2ts)
+                       return 0;
+       }
+       /*
+        * iscsit_build_r2ts_for_cmd() can handle the rest from here.
+        */
+       return iscsit_build_r2ts_for_cmd(cmd, conn, 2);
+}
+
+static int iscsit_task_reassign_complete_read(
+       struct iscsi_cmd *cmd,
+       struct iscsi_tmr_req *tmr_req)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_datain_req *dr;
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+       /*
+        * The Initiator must not send a Data SNACK with a BegRun less than
+        * the TMR TASK_REASSIGN's ExpDataSN.
+        */
+       if (!tmr_req->exp_data_sn) {
+               cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK;
+               cmd->acked_data_sn = 0;
+       } else {
+               cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+               cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
+       }
+
+       if (!atomic_read(&cmd->transport_sent)) {
+               pr_debug("READ ITT: 0x%08x: t_state: %d never sent to"
+                       " transport\n", cmd->init_task_tag,
+                       cmd->se_cmd.t_state);
+               transport_generic_handle_cdb(se_cmd);
+               return 0;
+       }
+
+       if (!atomic_read(&se_cmd->t_transport_complete)) {
+               pr_err("READ ITT: 0x%08x: t_state: %d, never returned"
+                       " from transport\n", cmd->init_task_tag,
+                       cmd->se_cmd.t_state);
+               return -1;
+       }
+
+       dr = iscsit_allocate_datain_req();
+       if (!dr)
+               return -1;
+       /*
+        * The TMR TASK_REASSIGN's ExpDataSN contains the next DataSN the
+        * Initiator is expecting.
+        */
+       dr->data_sn = dr->begrun = tmr_req->exp_data_sn;
+       dr->runlength = 0;
+       dr->generate_recovery_values = 1;
+       dr->recovery = DATAIN_CONNECTION_RECOVERY;
+
+       iscsit_attach_datain_req(cmd, dr);
+
+       cmd->i_state = ISTATE_SEND_DATAIN;
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+       return 0;
+}
+
+static int iscsit_task_reassign_complete_none(
+       struct iscsi_cmd *cmd,
+       struct iscsi_tmr_req *tmr_req)
+{
+       struct iscsi_conn *conn = cmd->conn;
+
+       cmd->i_state = ISTATE_SEND_STATUS;
+       iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+       return 0;
+}
+
+static int iscsit_task_reassign_complete_scsi_cmnd(
+       struct iscsi_tmr_req *tmr_req,
+       struct iscsi_conn *conn)
+{
+       struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+       struct se_cmd *se_cmd = se_tmr->ref_cmd;
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+       struct iscsi_conn_recovery *cr;
+
+       if (!cmd->cr) {
+               pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+                       " is NULL!\n", cmd->init_task_tag);
+               return -1;
+       }
+       cr = cmd->cr;
+
+       /*
+        * Reset the StatSN so a new one for this commands new connection
+        * will be assigned.
+        * Reset the ExpStatSN as well so we may receive Status SNACKs.
+        */
+       cmd->stat_sn = cmd->exp_stat_sn = 0;
+
+       iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess);
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) {
+               cmd->i_state = ISTATE_SEND_STATUS;
+               iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+               return 0;
+       }
+
+       switch (cmd->data_direction) {
+       case DMA_TO_DEVICE:
+               return iscsit_task_reassign_complete_write(cmd, tmr_req);
+       case DMA_FROM_DEVICE:
+               return iscsit_task_reassign_complete_read(cmd, tmr_req);
+       case DMA_NONE:
+               return iscsit_task_reassign_complete_none(cmd, tmr_req);
+       default:
+               pr_err("Unknown cmd->data_direction: 0x%02x\n",
+                               cmd->data_direction);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int iscsit_task_reassign_complete(
+       struct iscsi_tmr_req *tmr_req,
+       struct iscsi_conn *conn)
+{
+       struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+       struct se_cmd *se_cmd;
+       struct iscsi_cmd *cmd;
+       int ret = 0;
+
+       if (!se_tmr->ref_cmd) {
+               pr_err("TMR Request is missing a RefCmd struct iscsi_cmd.\n");
+               return -1;
+       }
+       se_cmd = se_tmr->ref_cmd;
+       cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       cmd->conn = conn;
+
+       switch (cmd->iscsi_opcode) {
+       case ISCSI_OP_NOOP_OUT:
+               ret = iscsit_task_reassign_complete_nop_out(tmr_req, conn);
+               break;
+       case ISCSI_OP_SCSI_CMD:
+               ret = iscsit_task_reassign_complete_scsi_cmnd(tmr_req, conn);
+               break;
+       default:
+                pr_err("Illegal iSCSI Opcode 0x%02x during"
+                       " command realligence\n", cmd->iscsi_opcode);
+               return -1;
+       }
+
+       if (ret != 0)
+               return ret;
+
+       pr_debug("Completed connection realligence for Opcode: 0x%02x,"
+               " ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode,
+                       cmd->init_task_tag, conn->cid);
+
+       return 0;
+}
+
+/*
+ *     Handles special after-the-fact actions related to TMRs.
+ *     Right now the only one that its really needed for is
+ *     connection recovery releated TASK_REASSIGN.
+ */
+extern int iscsit_tmr_post_handler(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+       struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+       struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+
+       if (tmr_req->task_reassign &&
+          (se_tmr->response == ISCSI_TMF_RSP_COMPLETE))
+               return iscsit_task_reassign_complete(tmr_req, conn);
+
+       return 0;
+}
+
+/*
+ *     Nothing to do here, but leave it for good measure. :-)
+ */
+int iscsit_task_reassign_prepare_read(
+       struct iscsi_tmr_req *tmr_req,
+       struct iscsi_conn *conn)
+{
+       return 0;
+}
+
+static void iscsit_task_reassign_prepare_unsolicited_dataout(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       int i, j;
+       struct iscsi_pdu *pdu = NULL;
+       struct iscsi_seq *seq = NULL;
+
+       if (conn->sess->sess_ops->DataSequenceInOrder) {
+               cmd->data_sn = 0;
+
+               if (cmd->immediate_data)
+                       cmd->r2t_offset += (cmd->first_burst_len -
+                               cmd->seq_start_offset);
+
+               if (conn->sess->sess_ops->DataPDUInOrder) {
+                       cmd->write_data_done -= (cmd->immediate_data) ?
+                                               (cmd->first_burst_len -
+                                                cmd->seq_start_offset) :
+                                                cmd->first_burst_len;
+                       cmd->first_burst_len = 0;
+                       return;
+               }
+
+               for (i = 0; i < cmd->pdu_count; i++) {
+                       pdu = &cmd->pdu_list[i];
+
+                       if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+                               continue;
+
+                       if ((pdu->offset >= cmd->seq_start_offset) &&
+                          ((pdu->offset + pdu->length) <=
+                            cmd->seq_end_offset)) {
+                               cmd->first_burst_len -= pdu->length;
+                               cmd->write_data_done -= pdu->length;
+                               pdu->status = ISCSI_PDU_NOT_RECEIVED;
+                       }
+               }
+       } else {
+               for (i = 0; i < cmd->seq_count; i++) {
+                       seq = &cmd->seq_list[i];
+
+                       if (seq->type != SEQTYPE_UNSOLICITED)
+                               continue;
+
+                       cmd->write_data_done -=
+                                       (seq->offset - seq->orig_offset);
+                       cmd->first_burst_len = 0;
+                       seq->data_sn = 0;
+                       seq->offset = seq->orig_offset;
+                       seq->next_burst_len = 0;
+                       seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+                       if (conn->sess->sess_ops->DataPDUInOrder)
+                               continue;
+
+                       for (j = 0; j < seq->pdu_count; j++) {
+                               pdu = &cmd->pdu_list[j+seq->pdu_start];
+
+                               if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+                                       continue;
+
+                               pdu->status = ISCSI_PDU_NOT_RECEIVED;
+                       }
+               }
+       }
+}
+
+int iscsit_task_reassign_prepare_write(
+       struct iscsi_tmr_req *tmr_req,
+       struct iscsi_conn *conn)
+{
+       struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+       struct se_cmd *se_cmd = se_tmr->ref_cmd;
+       struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+       struct iscsi_pdu *pdu = NULL;
+       struct iscsi_r2t *r2t = NULL, *r2t_tmp;
+       int first_incomplete_r2t = 1, i = 0;
+
+       /*
+        * The command was in the process of receiving Unsolicited DataOUT when
+        * the connection failed.
+        */
+       if (cmd->unsolicited_data)
+               iscsit_task_reassign_prepare_unsolicited_dataout(cmd, conn);
+
+       /*
+        * The Initiator is requesting R2Ts starting from zero,  skip
+        * checking acknowledged R2Ts and start checking struct iscsi_r2ts
+        * greater than zero.
+        */
+       if (!tmr_req->exp_data_sn)
+               goto drop_unacknowledged_r2ts;
+
+       /*
+        * We now check that the PDUs in DataOUT sequences below
+        * the TMR TASK_REASSIGN ExpDataSN (R2TSN the Initiator is
+        * expecting next) have all the DataOUT they require to complete
+        * the DataOUT sequence.  First scan from R2TSN 0 to TMR
+        * TASK_REASSIGN ExpDataSN-1.
+        *
+        * If we have not received all DataOUT in question,  we must
+        * make sure to make the appropriate changes to values in
+        * struct iscsi_cmd (and elsewhere depending on session parameters)
+        * so iscsit_build_r2ts_for_cmd() in iscsit_task_reassign_complete_write()
+        * will resend a new R2T for the DataOUT sequences in question.
+        */
+       spin_lock_bh(&cmd->r2t_lock);
+       if (list_empty(&cmd->cmd_r2t_list)) {
+               spin_unlock_bh(&cmd->r2t_lock);
+               return -1;
+       }
+
+       list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+
+               if (r2t->r2t_sn >= tmr_req->exp_data_sn)
+                       continue;
+               /*
+                * Safely ignore Recovery R2Ts and R2Ts that have completed
+                * DataOUT sequences.
+                */
+               if (r2t->seq_complete)
+                       continue;
+
+               if (r2t->recovery_r2t)
+                       continue;
+
+               /*
+                *                 DataSequenceInOrder=Yes:
+                *
+                * Taking into account the iSCSI implementation requirement of
+                * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and
+                * DataSequenceInOrder=Yes, we must take into consideration
+                * the following:
+                *
+                *                  DataSequenceInOrder=No:
+                *
+                * Taking into account that the Initiator controls the (possibly
+                * random) PDU Order in (possibly random) Sequence Order of
+                * DataOUT the target requests with R2Ts,  we must take into
+                * consideration the following:
+                *
+                *      DataPDUInOrder=Yes for DataSequenceInOrder=[Yes,No]:
+                *
+                * While processing non-complete R2T DataOUT sequence requests
+                * the Target will re-request only the total sequence length
+                * minus current received offset.  This is because we must
+                * assume the initiator will continue sending DataOUT from the
+                * last PDU before the connection failed.
+                *
+                *      DataPDUInOrder=No for DataSequenceInOrder=[Yes,No]:
+                *
+                * While processing non-complete R2T DataOUT sequence requests
+                * the Target will re-request the entire DataOUT sequence if
+                * any single PDU is missing from the sequence.  This is because
+                * we have no logical method to determine the next PDU offset,
+                * and we must assume the Initiator will be sending any random
+                * PDU offset in the current sequence after TASK_REASSIGN
+                * has completed.
+                */
+               if (conn->sess->sess_ops->DataSequenceInOrder) {
+                       if (!first_incomplete_r2t) {
+                               cmd->r2t_offset -= r2t->xfer_len;
+                               goto next;
+                       }
+
+                       if (conn->sess->sess_ops->DataPDUInOrder) {
+                               cmd->data_sn = 0;
+                               cmd->r2t_offset -= (r2t->xfer_len -
+                                       cmd->next_burst_len);
+                               first_incomplete_r2t = 0;
+                               goto next;
+                       }
+
+                       cmd->data_sn = 0;
+                       cmd->r2t_offset -= r2t->xfer_len;
+
+                       for (i = 0; i < cmd->pdu_count; i++) {
+                               pdu = &cmd->pdu_list[i];
+
+                               if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+                                       continue;
+
+                               if ((pdu->offset >= r2t->offset) &&
+                                   (pdu->offset < (r2t->offset +
+                                               r2t->xfer_len))) {
+                                       cmd->next_burst_len -= pdu->length;
+                                       cmd->write_data_done -= pdu->length;
+                                       pdu->status = ISCSI_PDU_NOT_RECEIVED;
+                               }
+                       }
+
+                       first_incomplete_r2t = 0;
+               } else {
+                       struct iscsi_seq *seq;
+
+                       seq = iscsit_get_seq_holder(cmd, r2t->offset,
+                                       r2t->xfer_len);
+                       if (!seq) {
+                               spin_unlock_bh(&cmd->r2t_lock);
+                               return -1;
+                       }
+
+                       cmd->write_data_done -=
+                                       (seq->offset - seq->orig_offset);
+                       seq->data_sn = 0;
+                       seq->offset = seq->orig_offset;
+                       seq->next_burst_len = 0;
+                       seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+                       cmd->seq_send_order--;
+
+                       if (conn->sess->sess_ops->DataPDUInOrder)
+                               goto next;
+
+                       for (i = 0; i < seq->pdu_count; i++) {
+                               pdu = &cmd->pdu_list[i+seq->pdu_start];
+
+                               if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+                                       continue;
+
+                               pdu->status = ISCSI_PDU_NOT_RECEIVED;
+                       }
+               }
+
+next:
+               cmd->outstanding_r2ts--;
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       /*
+        * We now drop all unacknowledged R2Ts, ie: ExpDataSN from TMR
+        * TASK_REASSIGN to the last R2T in the list..  We are also careful
+        * to check that the Initiator is not requesting R2Ts for DataOUT
+        * sequences it has already completed.
+        *
+        * Free each R2T in question and adjust values in struct iscsi_cmd
+        * accordingly so iscsit_build_r2ts_for_cmd() do the rest of
+        * the work after the TMR TASK_REASSIGN Response is sent.
+        */
+drop_unacknowledged_r2ts:
+
+       cmd->cmd_flags &= ~ICF_SENT_LAST_R2T;
+       cmd->r2t_sn = tmr_req->exp_data_sn;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) {
+               /*
+                * Skip up to the R2T Sequence number provided by the
+                * iSCSI TASK_REASSIGN TMR
+                */
+               if (r2t->r2t_sn < tmr_req->exp_data_sn)
+                       continue;
+
+               if (r2t->seq_complete) {
+                       pr_err("Initiator is requesting R2Ts from"
+                               " R2TSN: 0x%08x, but R2TSN: 0x%08x, Offset: %u,"
+                               " Length: %u is already complete."
+                               "   BAD INITIATOR ERL=2 IMPLEMENTATION!\n",
+                               tmr_req->exp_data_sn, r2t->r2t_sn,
+                               r2t->offset, r2t->xfer_len);
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return -1;
+               }
+
+               if (r2t->recovery_r2t) {
+                       iscsit_free_r2t(r2t, cmd);
+                       continue;
+               }
+
+               /*                 DataSequenceInOrder=Yes:
+                *
+                * Taking into account the iSCSI implementation requirement of
+                * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and
+                * DataSequenceInOrder=Yes, it's safe to subtract the R2Ts
+                * entire transfer length from the commands R2T offset marker.
+                *
+                *                 DataSequenceInOrder=No:
+                *
+                * We subtract the difference from struct iscsi_seq between the
+                * current offset and original offset from cmd->write_data_done
+                * for account for DataOUT PDUs already received.  Then reset
+                * the current offset to the original and zero out the current
+                * burst length,  to make sure we re-request the entire DataOUT
+                * sequence.
+                */
+               if (conn->sess->sess_ops->DataSequenceInOrder)
+                       cmd->r2t_offset -= r2t->xfer_len;
+               else
+                       cmd->seq_send_order--;
+
+               cmd->outstanding_r2ts--;
+               iscsit_free_r2t(r2t, cmd);
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       return 0;
+}
+
+/*
+ *     Performs sanity checks TMR TASK_REASSIGN's ExpDataSN for
+ *     a given struct iscsi_cmd.
+ */
+int iscsit_check_task_reassign_expdatasn(
+       struct iscsi_tmr_req *tmr_req,
+       struct iscsi_conn *conn)
+{
+       struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+       struct se_cmd *se_cmd = se_tmr->ref_cmd;
+       struct iscsi_cmd *ref_cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+       if (ref_cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD)
+               return 0;
+
+       if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION)
+               return 0;
+
+       if (ref_cmd->data_direction == DMA_NONE)
+               return 0;
+
+       /*
+        * For READs the TMR TASK_REASSIGNs ExpDataSN contains the next DataSN
+        * of DataIN the Initiator is expecting.
+        *
+        * Also check that the Initiator is not re-requesting DataIN that has
+        * already been acknowledged with a DataAck SNACK.
+        */
+       if (ref_cmd->data_direction == DMA_FROM_DEVICE) {
+               if (tmr_req->exp_data_sn > ref_cmd->data_sn) {
+                       pr_err("Received ExpDataSN: 0x%08x for READ"
+                               " in TMR TASK_REASSIGN greater than command's"
+                               " DataSN: 0x%08x.\n", tmr_req->exp_data_sn,
+                               ref_cmd->data_sn);
+                       return -1;
+               }
+               if ((ref_cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+                   (tmr_req->exp_data_sn <= ref_cmd->acked_data_sn)) {
+                       pr_err("Received ExpDataSN: 0x%08x for READ"
+                               " in TMR TASK_REASSIGN for previously"
+                               " acknowledged DataIN: 0x%08x,"
+                               " protocol error\n", tmr_req->exp_data_sn,
+                               ref_cmd->acked_data_sn);
+                       return -1;
+               }
+               return iscsit_task_reassign_prepare_read(tmr_req, conn);
+       }
+
+       /*
+        * For WRITEs the TMR TASK_REASSIGNs ExpDataSN contains the next R2TSN
+        * for R2Ts the Initiator is expecting.
+        *
+        * Do the magic in iscsit_task_reassign_prepare_write().
+        */
+       if (ref_cmd->data_direction == DMA_TO_DEVICE) {
+               if (tmr_req->exp_data_sn > ref_cmd->r2t_sn) {
+                       pr_err("Received ExpDataSN: 0x%08x for WRITE"
+                               " in TMR TASK_REASSIGN greater than command's"
+                               " R2TSN: 0x%08x.\n", tmr_req->exp_data_sn,
+                                       ref_cmd->r2t_sn);
+                       return -1;
+               }
+               return iscsit_task_reassign_prepare_write(tmr_req, conn);
+       }
+
+       pr_err("Unknown iSCSI data_direction: 0x%02x\n",
+                       ref_cmd->data_direction);
+
+       return -1;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tmr.h b/drivers/target/iscsi/iscsi_target_tmr.h
new file mode 100644 (file)
index 0000000..142e992
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef ISCSI_TARGET_TMR_H
+#define ISCSI_TARGET_TMR_H
+
+extern u8 iscsit_tmr_abort_task(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_tmr_task_warm_reset(struct iscsi_conn *, struct iscsi_tmr_req *,
+                       unsigned char *);
+extern int iscsit_tmr_task_cold_reset(struct iscsi_conn *, struct iscsi_tmr_req *,
+                       unsigned char *);
+extern u8 iscsit_tmr_task_reassign(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_check_task_reassign_expdatasn(struct iscsi_tmr_req *,
+                       struct iscsi_conn *);
+
+#endif /* ISCSI_TARGET_TMR_H */
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
new file mode 100644 (file)
index 0000000..d4cf2cd
--- /dev/null
@@ -0,0 +1,759 @@
+/*******************************************************************************
+ * This file contains iSCSI Target Portal Group related functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_tpg.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_nodeattrib.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_parameters.h"
+
+struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u16 tpgt)
+{
+       struct iscsi_portal_group *tpg;
+
+       tpg = kzalloc(sizeof(struct iscsi_portal_group), GFP_KERNEL);
+       if (!tpg) {
+               pr_err("Unable to allocate struct iscsi_portal_group\n");
+               return NULL;
+       }
+
+       tpg->tpgt = tpgt;
+       tpg->tpg_state = TPG_STATE_FREE;
+       tpg->tpg_tiqn = tiqn;
+       INIT_LIST_HEAD(&tpg->tpg_gnp_list);
+       INIT_LIST_HEAD(&tpg->tpg_list);
+       mutex_init(&tpg->tpg_access_lock);
+       mutex_init(&tpg->np_login_lock);
+       spin_lock_init(&tpg->tpg_state_lock);
+       spin_lock_init(&tpg->tpg_np_lock);
+
+       return tpg;
+}
+
+static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *);
+
+int iscsit_load_discovery_tpg(void)
+{
+       struct iscsi_param *param;
+       struct iscsi_portal_group *tpg;
+       int ret;
+
+       tpg = iscsit_alloc_portal_group(NULL, 1);
+       if (!tpg) {
+               pr_err("Unable to allocate struct iscsi_portal_group\n");
+               return -1;
+       }
+
+       ret = core_tpg_register(
+                       &lio_target_fabric_configfs->tf_ops,
+                       NULL, &tpg->tpg_se_tpg, (void *)tpg,
+                       TRANSPORT_TPG_TYPE_DISCOVERY);
+       if (ret < 0) {
+               kfree(tpg);
+               return -1;
+       }
+
+       tpg->sid = 1; /* First Assigned LIO Session ID */
+       iscsit_set_default_tpg_attribs(tpg);
+
+       if (iscsi_create_default_params(&tpg->param_list) < 0)
+               goto out;
+       /*
+        * By default we disable authentication for discovery sessions,
+        * this can be changed with:
+        *
+        * /sys/kernel/config/target/iscsi/discovery_auth/enforce_discovery_auth
+        */
+       param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+       if (!param)
+               goto out;
+
+       if (iscsi_update_param_value(param, "CHAP,None") < 0)
+               goto out;
+
+       tpg->tpg_attrib.authentication = 0;
+
+       spin_lock(&tpg->tpg_state_lock);
+       tpg->tpg_state  = TPG_STATE_ACTIVE;
+       spin_unlock(&tpg->tpg_state_lock);
+
+       iscsit_global->discovery_tpg = tpg;
+       pr_debug("CORE[0] - Allocated Discovery TPG\n");
+
+       return 0;
+out:
+       if (tpg->sid == 1)
+               core_tpg_deregister(&tpg->tpg_se_tpg);
+       kfree(tpg);
+       return -1;
+}
+
+void iscsit_release_discovery_tpg(void)
+{
+       struct iscsi_portal_group *tpg = iscsit_global->discovery_tpg;
+
+       if (!tpg)
+               return;
+
+       core_tpg_deregister(&tpg->tpg_se_tpg);
+
+       kfree(tpg);
+       iscsit_global->discovery_tpg = NULL;
+}
+
+struct iscsi_portal_group *iscsit_get_tpg_from_np(
+       struct iscsi_tiqn *tiqn,
+       struct iscsi_np *np)
+{
+       struct iscsi_portal_group *tpg = NULL;
+       struct iscsi_tpg_np *tpg_np;
+
+       spin_lock(&tiqn->tiqn_tpg_lock);
+       list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) {
+
+               spin_lock(&tpg->tpg_state_lock);
+               if (tpg->tpg_state == TPG_STATE_FREE) {
+                       spin_unlock(&tpg->tpg_state_lock);
+                       continue;
+               }
+               spin_unlock(&tpg->tpg_state_lock);
+
+               spin_lock(&tpg->tpg_np_lock);
+               list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
+                       if (tpg_np->tpg_np == np) {
+                               spin_unlock(&tpg->tpg_np_lock);
+                               spin_unlock(&tiqn->tiqn_tpg_lock);
+                               return tpg;
+                       }
+               }
+               spin_unlock(&tpg->tpg_np_lock);
+       }
+       spin_unlock(&tiqn->tiqn_tpg_lock);
+
+       return NULL;
+}
+
+int iscsit_get_tpg(
+       struct iscsi_portal_group *tpg)
+{
+       int ret;
+
+       ret = mutex_lock_interruptible(&tpg->tpg_access_lock);
+       return ((ret != 0) || signal_pending(current)) ? -1 : 0;
+}
+
+void iscsit_put_tpg(struct iscsi_portal_group *tpg)
+{
+       mutex_unlock(&tpg->tpg_access_lock);
+}
+
+static void iscsit_clear_tpg_np_login_thread(
+       struct iscsi_tpg_np *tpg_np,
+       struct iscsi_portal_group *tpg)
+{
+       if (!tpg_np->tpg_np) {
+               pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
+               return;
+       }
+
+       iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg);
+}
+
+void iscsit_clear_tpg_np_login_threads(
+       struct iscsi_portal_group *tpg)
+{
+       struct iscsi_tpg_np *tpg_np;
+
+       spin_lock(&tpg->tpg_np_lock);
+       list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
+               if (!tpg_np->tpg_np) {
+                       pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
+                       continue;
+               }
+               spin_unlock(&tpg->tpg_np_lock);
+               iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+               spin_lock(&tpg->tpg_np_lock);
+       }
+       spin_unlock(&tpg->tpg_np_lock);
+}
+
+void iscsit_tpg_dump_params(struct iscsi_portal_group *tpg)
+{
+       iscsi_print_params(tpg->param_list);
+}
+
+static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       a->authentication = TA_AUTHENTICATION;
+       a->login_timeout = TA_LOGIN_TIMEOUT;
+       a->netif_timeout = TA_NETIF_TIMEOUT;
+       a->default_cmdsn_depth = TA_DEFAULT_CMDSN_DEPTH;
+       a->generate_node_acls = TA_GENERATE_NODE_ACLS;
+       a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS;
+       a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT;
+       a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT;
+}
+
+int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
+{
+       if (tpg->tpg_state != TPG_STATE_FREE) {
+               pr_err("Unable to add iSCSI Target Portal Group: %d"
+                       " while not in TPG_STATE_FREE state.\n", tpg->tpgt);
+               return -EEXIST;
+       }
+       iscsit_set_default_tpg_attribs(tpg);
+
+       if (iscsi_create_default_params(&tpg->param_list) < 0)
+               goto err_out;
+
+       ISCSI_TPG_ATTRIB(tpg)->tpg = tpg;
+
+       spin_lock(&tpg->tpg_state_lock);
+       tpg->tpg_state  = TPG_STATE_INACTIVE;
+       spin_unlock(&tpg->tpg_state_lock);
+
+       spin_lock(&tiqn->tiqn_tpg_lock);
+       list_add_tail(&tpg->tpg_list, &tiqn->tiqn_tpg_list);
+       tiqn->tiqn_ntpgs++;
+       pr_debug("CORE[%s]_TPG[%hu] - Added iSCSI Target Portal Group\n",
+                       tiqn->tiqn, tpg->tpgt);
+       spin_unlock(&tiqn->tiqn_tpg_lock);
+
+       return 0;
+err_out:
+       if (tpg->param_list) {
+               iscsi_release_param_list(tpg->param_list);
+               tpg->param_list = NULL;
+       }
+       kfree(tpg);
+       return -ENOMEM;
+}
+
+int iscsit_tpg_del_portal_group(
+       struct iscsi_tiqn *tiqn,
+       struct iscsi_portal_group *tpg,
+       int force)
+{
+       u8 old_state = tpg->tpg_state;
+
+       spin_lock(&tpg->tpg_state_lock);
+       tpg->tpg_state = TPG_STATE_INACTIVE;
+       spin_unlock(&tpg->tpg_state_lock);
+
+       if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
+               pr_err("Unable to delete iSCSI Target Portal Group:"
+                       " %hu while active sessions exist, and force=0\n",
+                       tpg->tpgt);
+               tpg->tpg_state = old_state;
+               return -EPERM;
+       }
+
+       core_tpg_clear_object_luns(&tpg->tpg_se_tpg);
+
+       if (tpg->param_list) {
+               iscsi_release_param_list(tpg->param_list);
+               tpg->param_list = NULL;
+       }
+
+       core_tpg_deregister(&tpg->tpg_se_tpg);
+
+       spin_lock(&tpg->tpg_state_lock);
+       tpg->tpg_state = TPG_STATE_FREE;
+       spin_unlock(&tpg->tpg_state_lock);
+
+       spin_lock(&tiqn->tiqn_tpg_lock);
+       tiqn->tiqn_ntpgs--;
+       list_del(&tpg->tpg_list);
+       spin_unlock(&tiqn->tiqn_tpg_lock);
+
+       pr_debug("CORE[%s]_TPG[%hu] - Deleted iSCSI Target Portal Group\n",
+                       tiqn->tiqn, tpg->tpgt);
+
+       kfree(tpg);
+       return 0;
+}
+
+int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
+{
+       struct iscsi_param *param;
+       struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+       spin_lock(&tpg->tpg_state_lock);
+       if (tpg->tpg_state == TPG_STATE_ACTIVE) {
+               pr_err("iSCSI target portal group: %hu is already"
+                       " active, ignoring request.\n", tpg->tpgt);
+               spin_unlock(&tpg->tpg_state_lock);
+               return -EINVAL;
+       }
+       /*
+        * Make sure that AuthMethod does not contain None as an option
+        * unless explictly disabled.  Set the default to CHAP if authentication
+        * is enforced (as per default), and remove the NONE option.
+        */
+       param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+       if (!param) {
+               spin_unlock(&tpg->tpg_state_lock);
+               return -ENOMEM;
+       }
+
+       if (ISCSI_TPG_ATTRIB(tpg)->authentication) {
+               if (!strcmp(param->value, NONE))
+                       if (iscsi_update_param_value(param, CHAP) < 0) {
+                               spin_unlock(&tpg->tpg_state_lock);
+                               return -ENOMEM;
+                       }
+               if (iscsit_ta_authentication(tpg, 1) < 0) {
+                       spin_unlock(&tpg->tpg_state_lock);
+                       return -ENOMEM;
+               }
+       }
+
+       tpg->tpg_state = TPG_STATE_ACTIVE;
+       spin_unlock(&tpg->tpg_state_lock);
+
+       spin_lock(&tiqn->tiqn_tpg_lock);
+       tiqn->tiqn_active_tpgs++;
+       pr_debug("iSCSI_TPG[%hu] - Enabled iSCSI Target Portal Group\n",
+                       tpg->tpgt);
+       spin_unlock(&tiqn->tiqn_tpg_lock);
+
+       return 0;
+}
+
+int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force)
+{
+       struct iscsi_tiqn *tiqn;
+       u8 old_state = tpg->tpg_state;
+
+       spin_lock(&tpg->tpg_state_lock);
+       if (tpg->tpg_state == TPG_STATE_INACTIVE) {
+               pr_err("iSCSI Target Portal Group: %hu is already"
+                       " inactive, ignoring request.\n", tpg->tpgt);
+               spin_unlock(&tpg->tpg_state_lock);
+               return -EINVAL;
+       }
+       tpg->tpg_state = TPG_STATE_INACTIVE;
+       spin_unlock(&tpg->tpg_state_lock);
+
+       iscsit_clear_tpg_np_login_threads(tpg);
+
+       if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
+               spin_lock(&tpg->tpg_state_lock);
+               tpg->tpg_state = old_state;
+               spin_unlock(&tpg->tpg_state_lock);
+               pr_err("Unable to disable iSCSI Target Portal Group:"
+                       " %hu while active sessions exist, and force=0\n",
+                       tpg->tpgt);
+               return -EPERM;
+       }
+
+       tiqn = tpg->tpg_tiqn;
+       if (!tiqn || (tpg == iscsit_global->discovery_tpg))
+               return 0;
+
+       spin_lock(&tiqn->tiqn_tpg_lock);
+       tiqn->tiqn_active_tpgs--;
+       pr_debug("iSCSI_TPG[%hu] - Disabled iSCSI Target Portal Group\n",
+                       tpg->tpgt);
+       spin_unlock(&tiqn->tiqn_tpg_lock);
+
+       return 0;
+}
+
+struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(
+       struct iscsi_session *sess)
+{
+       struct se_session *se_sess = sess->se_sess;
+       struct se_node_acl *se_nacl = se_sess->se_node_acl;
+       struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl,
+                                       se_node_acl);
+
+       return &acl->node_attrib;
+}
+
+struct iscsi_tpg_np *iscsit_tpg_locate_child_np(
+       struct iscsi_tpg_np *tpg_np,
+       int network_transport)
+{
+       struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp;
+
+       spin_lock(&tpg_np->tpg_np_parent_lock);
+       list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp,
+                       &tpg_np->tpg_np_parent_list, tpg_np_child_list) {
+               if (tpg_np_child->tpg_np->np_network_transport ==
+                               network_transport) {
+                       spin_unlock(&tpg_np->tpg_np_parent_lock);
+                       return tpg_np_child;
+               }
+       }
+       spin_unlock(&tpg_np->tpg_np_parent_lock);
+
+       return NULL;
+}
+
+struct iscsi_tpg_np *iscsit_tpg_add_network_portal(
+       struct iscsi_portal_group *tpg,
+       struct __kernel_sockaddr_storage *sockaddr,
+       char *ip_str,
+       struct iscsi_tpg_np *tpg_np_parent,
+       int network_transport)
+{
+       struct iscsi_np *np;
+       struct iscsi_tpg_np *tpg_np;
+
+       tpg_np = kzalloc(sizeof(struct iscsi_tpg_np), GFP_KERNEL);
+       if (!tpg_np) {
+               pr_err("Unable to allocate memory for"
+                               " struct iscsi_tpg_np.\n");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       np = iscsit_add_np(sockaddr, ip_str, network_transport);
+       if (IS_ERR(np)) {
+               kfree(tpg_np);
+               return ERR_CAST(np);
+       }
+
+       INIT_LIST_HEAD(&tpg_np->tpg_np_list);
+       INIT_LIST_HEAD(&tpg_np->tpg_np_child_list);
+       INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list);
+       spin_lock_init(&tpg_np->tpg_np_parent_lock);
+       tpg_np->tpg_np          = np;
+       tpg_np->tpg             = tpg;
+
+       spin_lock(&tpg->tpg_np_lock);
+       list_add_tail(&tpg_np->tpg_np_list, &tpg->tpg_gnp_list);
+       tpg->num_tpg_nps++;
+       if (tpg->tpg_tiqn)
+               tpg->tpg_tiqn->tiqn_num_tpg_nps++;
+       spin_unlock(&tpg->tpg_np_lock);
+
+       if (tpg_np_parent) {
+               tpg_np->tpg_np_parent = tpg_np_parent;
+               spin_lock(&tpg_np_parent->tpg_np_parent_lock);
+               list_add_tail(&tpg_np->tpg_np_child_list,
+                       &tpg_np_parent->tpg_np_parent_list);
+               spin_unlock(&tpg_np_parent->tpg_np_parent_lock);
+       }
+
+       pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n",
+               tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+               (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP");
+
+       return tpg_np;
+}
+
+static int iscsit_tpg_release_np(
+       struct iscsi_tpg_np *tpg_np,
+       struct iscsi_portal_group *tpg,
+       struct iscsi_np *np)
+{
+       iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+
+       pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n",
+               tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+               (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP");
+
+       tpg_np->tpg_np = NULL;
+       tpg_np->tpg = NULL;
+       kfree(tpg_np);
+       /*
+        * iscsit_del_np() will shutdown struct iscsi_np when last TPG reference is released.
+        */
+       return iscsit_del_np(np);
+}
+
+int iscsit_tpg_del_network_portal(
+       struct iscsi_portal_group *tpg,
+       struct iscsi_tpg_np *tpg_np)
+{
+       struct iscsi_np *np;
+       struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp;
+       int ret = 0;
+
+       np = tpg_np->tpg_np;
+       if (!np) {
+               pr_err("Unable to locate struct iscsi_np from"
+                               " struct iscsi_tpg_np\n");
+               return -EINVAL;
+       }
+
+       if (!tpg_np->tpg_np_parent) {
+               /*
+                * We are the parent tpg network portal.  Release all of the
+                * child tpg_np's (eg: the non ISCSI_TCP ones) on our parent
+                * list first.
+                */
+               list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp,
+                               &tpg_np->tpg_np_parent_list,
+                               tpg_np_child_list) {
+                       ret = iscsit_tpg_del_network_portal(tpg, tpg_np_child);
+                       if (ret < 0)
+                               pr_err("iscsit_tpg_del_network_portal()"
+                                       " failed: %d\n", ret);
+               }
+       } else {
+               /*
+                * We are not the parent ISCSI_TCP tpg network portal.  Release
+                * our own network portals from the child list.
+                */
+               spin_lock(&tpg_np->tpg_np_parent->tpg_np_parent_lock);
+               list_del(&tpg_np->tpg_np_child_list);
+               spin_unlock(&tpg_np->tpg_np_parent->tpg_np_parent_lock);
+       }
+
+       spin_lock(&tpg->tpg_np_lock);
+       list_del(&tpg_np->tpg_np_list);
+       tpg->num_tpg_nps--;
+       if (tpg->tpg_tiqn)
+               tpg->tpg_tiqn->tiqn_num_tpg_nps--;
+       spin_unlock(&tpg->tpg_np_lock);
+
+       return iscsit_tpg_release_np(tpg_np, tpg, np);
+}
+
+int iscsit_tpg_set_initiator_node_queue_depth(
+       struct iscsi_portal_group *tpg,
+       unsigned char *initiatorname,
+       u32 queue_depth,
+       int force)
+{
+       return core_tpg_set_initiator_node_queue_depth(&tpg->tpg_se_tpg,
+               initiatorname, queue_depth, force);
+}
+
+int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication)
+{
+       unsigned char buf1[256], buf2[256], *none = NULL;
+       int len;
+       struct iscsi_param *param;
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((authentication != 1) && (authentication != 0)) {
+               pr_err("Illegal value for authentication parameter:"
+                       " %u, ignoring request.\n", authentication);
+               return -1;
+       }
+
+       memset(buf1, 0, sizeof(buf1));
+       memset(buf2, 0, sizeof(buf2));
+
+       param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+       if (!param)
+               return -EINVAL;
+
+       if (authentication) {
+               snprintf(buf1, sizeof(buf1), "%s", param->value);
+               none = strstr(buf1, NONE);
+               if (!none)
+                       goto out;
+               if (!strncmp(none + 4, ",", 1)) {
+                       if (!strcmp(buf1, none))
+                               sprintf(buf2, "%s", none+5);
+                       else {
+                               none--;
+                               *none = '\0';
+                               len = sprintf(buf2, "%s", buf1);
+                               none += 5;
+                               sprintf(buf2 + len, "%s", none);
+                       }
+               } else {
+                       none--;
+                       *none = '\0';
+                       sprintf(buf2, "%s", buf1);
+               }
+               if (iscsi_update_param_value(param, buf2) < 0)
+                       return -EINVAL;
+       } else {
+               snprintf(buf1, sizeof(buf1), "%s", param->value);
+               none = strstr(buf1, NONE);
+               if ((none))
+                       goto out;
+               strncat(buf1, ",", strlen(","));
+               strncat(buf1, NONE, strlen(NONE));
+               if (iscsi_update_param_value(param, buf1) < 0)
+                       return -EINVAL;
+       }
+
+out:
+       a->authentication = authentication;
+       pr_debug("%s iSCSI Authentication Methods for TPG: %hu.\n",
+               a->authentication ? "Enforcing" : "Disabling", tpg->tpgt);
+
+       return 0;
+}
+
+int iscsit_ta_login_timeout(
+       struct iscsi_portal_group *tpg,
+       u32 login_timeout)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if (login_timeout > TA_LOGIN_TIMEOUT_MAX) {
+               pr_err("Requested Login Timeout %u larger than maximum"
+                       " %u\n", login_timeout, TA_LOGIN_TIMEOUT_MAX);
+               return -EINVAL;
+       } else if (login_timeout < TA_LOGIN_TIMEOUT_MIN) {
+               pr_err("Requested Logout Timeout %u smaller than"
+                       " minimum %u\n", login_timeout, TA_LOGIN_TIMEOUT_MIN);
+               return -EINVAL;
+       }
+
+       a->login_timeout = login_timeout;
+       pr_debug("Set Logout Timeout to %u for Target Portal Group"
+               " %hu\n", a->login_timeout, tpg->tpgt);
+
+       return 0;
+}
+
+int iscsit_ta_netif_timeout(
+       struct iscsi_portal_group *tpg,
+       u32 netif_timeout)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if (netif_timeout > TA_NETIF_TIMEOUT_MAX) {
+               pr_err("Requested Network Interface Timeout %u larger"
+                       " than maximum %u\n", netif_timeout,
+                               TA_NETIF_TIMEOUT_MAX);
+               return -EINVAL;
+       } else if (netif_timeout < TA_NETIF_TIMEOUT_MIN) {
+               pr_err("Requested Network Interface Timeout %u smaller"
+                       " than minimum %u\n", netif_timeout,
+                               TA_NETIF_TIMEOUT_MIN);
+               return -EINVAL;
+       }
+
+       a->netif_timeout = netif_timeout;
+       pr_debug("Set Network Interface Timeout to %u for"
+               " Target Portal Group %hu\n", a->netif_timeout, tpg->tpgt);
+
+       return 0;
+}
+
+int iscsit_ta_generate_node_acls(
+       struct iscsi_portal_group *tpg,
+       u32 flag)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((flag != 0) && (flag != 1)) {
+               pr_err("Illegal value %d\n", flag);
+               return -EINVAL;
+       }
+
+       a->generate_node_acls = flag;
+       pr_debug("iSCSI_TPG[%hu] - Generate Initiator Portal Group ACLs: %s\n",
+               tpg->tpgt, (a->generate_node_acls) ? "Enabled" : "Disabled");
+
+       return 0;
+}
+
+int iscsit_ta_default_cmdsn_depth(
+       struct iscsi_portal_group *tpg,
+       u32 tcq_depth)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if (tcq_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) {
+               pr_err("Requested Default Queue Depth: %u larger"
+                       " than maximum %u\n", tcq_depth,
+                               TA_DEFAULT_CMDSN_DEPTH_MAX);
+               return -EINVAL;
+       } else if (tcq_depth < TA_DEFAULT_CMDSN_DEPTH_MIN) {
+               pr_err("Requested Default Queue Depth: %u smaller"
+                       " than minimum %u\n", tcq_depth,
+                               TA_DEFAULT_CMDSN_DEPTH_MIN);
+               return -EINVAL;
+       }
+
+       a->default_cmdsn_depth = tcq_depth;
+       pr_debug("iSCSI_TPG[%hu] - Set Default CmdSN TCQ Depth to %u\n",
+               tpg->tpgt, a->default_cmdsn_depth);
+
+       return 0;
+}
+
+int iscsit_ta_cache_dynamic_acls(
+       struct iscsi_portal_group *tpg,
+       u32 flag)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((flag != 0) && (flag != 1)) {
+               pr_err("Illegal value %d\n", flag);
+               return -EINVAL;
+       }
+
+       a->cache_dynamic_acls = flag;
+       pr_debug("iSCSI_TPG[%hu] - Cache Dynamic Initiator Portal Group"
+               " ACLs %s\n", tpg->tpgt, (a->cache_dynamic_acls) ?
+               "Enabled" : "Disabled");
+
+       return 0;
+}
+
+int iscsit_ta_demo_mode_write_protect(
+       struct iscsi_portal_group *tpg,
+       u32 flag)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((flag != 0) && (flag != 1)) {
+               pr_err("Illegal value %d\n", flag);
+               return -EINVAL;
+       }
+
+       a->demo_mode_write_protect = flag;
+       pr_debug("iSCSI_TPG[%hu] - Demo Mode Write Protect bit: %s\n",
+               tpg->tpgt, (a->demo_mode_write_protect) ? "ON" : "OFF");
+
+       return 0;
+}
+
+int iscsit_ta_prod_mode_write_protect(
+       struct iscsi_portal_group *tpg,
+       u32 flag)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((flag != 0) && (flag != 1)) {
+               pr_err("Illegal value %d\n", flag);
+               return -EINVAL;
+       }
+
+       a->prod_mode_write_protect = flag;
+       pr_debug("iSCSI_TPG[%hu] - Production Mode Write Protect bit:"
+               " %s\n", tpg->tpgt, (a->prod_mode_write_protect) ?
+               "ON" : "OFF");
+
+       return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
new file mode 100644 (file)
index 0000000..dda48c1
--- /dev/null
@@ -0,0 +1,41 @@
+#ifndef ISCSI_TARGET_TPG_H
+#define ISCSI_TARGET_TPG_H
+
+extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, u16);
+extern int iscsit_load_discovery_tpg(void);
+extern void iscsit_release_discovery_tpg(void);
+extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *,
+                       struct iscsi_np *);
+extern int iscsit_get_tpg(struct iscsi_portal_group *);
+extern void iscsit_put_tpg(struct iscsi_portal_group *);
+extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *);
+extern void iscsit_tpg_dump_params(struct iscsi_portal_group *);
+extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *);
+extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *,
+                       int);
+extern int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *);
+extern int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *, int);
+extern struct iscsi_node_acl *iscsit_tpg_add_initiator_node_acl(
+                       struct iscsi_portal_group *, const char *, u32);
+extern void iscsit_tpg_del_initiator_node_acl(struct iscsi_portal_group *,
+                       struct se_node_acl *);
+extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session *);
+extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *);
+extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int);
+extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *,
+                       struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *,
+                       int);
+extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *,
+                       struct iscsi_tpg_np *);
+extern int iscsit_tpg_set_initiator_node_queue_depth(struct iscsi_portal_group *,
+                       unsigned char *, u32, int);
+extern int iscsit_ta_authentication(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_login_timeout(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_netif_timeout(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_generate_node_acls(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32);
+
+#endif /* ISCSI_TARGET_TPG_H */
diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c
new file mode 100644 (file)
index 0000000..0baac5b
--- /dev/null
@@ -0,0 +1,551 @@
+/*******************************************************************************
+ * This file contains the iSCSI Login Thread and Thread Queue functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target.h"
+
+static LIST_HEAD(active_ts_list);
+static LIST_HEAD(inactive_ts_list);
+static DEFINE_SPINLOCK(active_ts_lock);
+static DEFINE_SPINLOCK(inactive_ts_lock);
+static DEFINE_SPINLOCK(ts_bitmap_lock);
+
+static void iscsi_add_ts_to_active_list(struct iscsi_thread_set *ts)
+{
+       spin_lock(&active_ts_lock);
+       list_add_tail(&ts->ts_list, &active_ts_list);
+       iscsit_global->active_ts++;
+       spin_unlock(&active_ts_lock);
+}
+
+extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts)
+{
+       spin_lock(&inactive_ts_lock);
+       list_add_tail(&ts->ts_list, &inactive_ts_list);
+       iscsit_global->inactive_ts++;
+       spin_unlock(&inactive_ts_lock);
+}
+
+static void iscsi_del_ts_from_active_list(struct iscsi_thread_set *ts)
+{
+       spin_lock(&active_ts_lock);
+       list_del(&ts->ts_list);
+       iscsit_global->active_ts--;
+       spin_unlock(&active_ts_lock);
+}
+
+static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void)
+{
+       struct iscsi_thread_set *ts;
+
+       spin_lock(&inactive_ts_lock);
+       if (list_empty(&inactive_ts_list)) {
+               spin_unlock(&inactive_ts_lock);
+               return NULL;
+       }
+
+       list_for_each_entry(ts, &inactive_ts_list, ts_list)
+               break;
+
+       list_del(&ts->ts_list);
+       iscsit_global->inactive_ts--;
+       spin_unlock(&inactive_ts_lock);
+
+       return ts;
+}
+
+extern int iscsi_allocate_thread_sets(u32 thread_pair_count)
+{
+       int allocated_thread_pair_count = 0, i, thread_id;
+       struct iscsi_thread_set *ts = NULL;
+
+       for (i = 0; i < thread_pair_count; i++) {
+               ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL);
+               if (!ts) {
+                       pr_err("Unable to allocate memory for"
+                                       " thread set.\n");
+                       return allocated_thread_pair_count;
+               }
+               /*
+                * Locate the next available regision in the thread_set_bitmap
+                */
+               spin_lock(&ts_bitmap_lock);
+               thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
+                               iscsit_global->ts_bitmap_count, get_order(1));
+               spin_unlock(&ts_bitmap_lock);
+               if (thread_id < 0) {
+                       pr_err("bitmap_find_free_region() failed for"
+                               " thread_set_bitmap\n");
+                       kfree(ts);
+                       return allocated_thread_pair_count;
+               }
+
+               ts->thread_id = thread_id;
+               ts->status = ISCSI_THREAD_SET_FREE;
+               INIT_LIST_HEAD(&ts->ts_list);
+               spin_lock_init(&ts->ts_state_lock);
+               init_completion(&ts->rx_post_start_comp);
+               init_completion(&ts->tx_post_start_comp);
+               init_completion(&ts->rx_restart_comp);
+               init_completion(&ts->tx_restart_comp);
+               init_completion(&ts->rx_start_comp);
+               init_completion(&ts->tx_start_comp);
+
+               ts->create_threads = 1;
+               ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s",
+                                       ISCSI_TX_THREAD_NAME);
+               if (IS_ERR(ts->tx_thread)) {
+                       dump_stack();
+                       pr_err("Unable to start iscsi_target_tx_thread\n");
+                       break;
+               }
+
+               ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s",
+                                       ISCSI_RX_THREAD_NAME);
+               if (IS_ERR(ts->rx_thread)) {
+                       kthread_stop(ts->tx_thread);
+                       pr_err("Unable to start iscsi_target_rx_thread\n");
+                       break;
+               }
+               ts->create_threads = 0;
+
+               iscsi_add_ts_to_inactive_list(ts);
+               allocated_thread_pair_count++;
+       }
+
+       pr_debug("Spawned %d thread set(s) (%d total threads).\n",
+               allocated_thread_pair_count, allocated_thread_pair_count * 2);
+       return allocated_thread_pair_count;
+}
+
+extern void iscsi_deallocate_thread_sets(void)
+{
+       u32 released_count = 0;
+       struct iscsi_thread_set *ts = NULL;
+
+       while ((ts = iscsi_get_ts_from_inactive_list())) {
+
+               spin_lock_bh(&ts->ts_state_lock);
+               ts->status = ISCSI_THREAD_SET_DIE;
+               spin_unlock_bh(&ts->ts_state_lock);
+
+               if (ts->rx_thread) {
+                       send_sig(SIGINT, ts->rx_thread, 1);
+                       kthread_stop(ts->rx_thread);
+               }
+               if (ts->tx_thread) {
+                       send_sig(SIGINT, ts->tx_thread, 1);
+                       kthread_stop(ts->tx_thread);
+               }
+               /*
+                * Release this thread_id in the thread_set_bitmap
+                */
+               spin_lock(&ts_bitmap_lock);
+               bitmap_release_region(iscsit_global->ts_bitmap,
+                               ts->thread_id, get_order(1));
+               spin_unlock(&ts_bitmap_lock);
+
+               released_count++;
+               kfree(ts);
+       }
+
+       if (released_count)
+               pr_debug("Stopped %d thread set(s) (%d total threads)."
+                       "\n", released_count, released_count * 2);
+}
+
+static void iscsi_deallocate_extra_thread_sets(void)
+{
+       u32 orig_count, released_count = 0;
+       struct iscsi_thread_set *ts = NULL;
+
+       orig_count = TARGET_THREAD_SET_COUNT;
+
+       while ((iscsit_global->inactive_ts + 1) > orig_count) {
+               ts = iscsi_get_ts_from_inactive_list();
+               if (!ts)
+                       break;
+
+               spin_lock_bh(&ts->ts_state_lock);
+               ts->status = ISCSI_THREAD_SET_DIE;
+               spin_unlock_bh(&ts->ts_state_lock);
+
+               if (ts->rx_thread) {
+                       send_sig(SIGINT, ts->rx_thread, 1);
+                       kthread_stop(ts->rx_thread);
+               }
+               if (ts->tx_thread) {
+                       send_sig(SIGINT, ts->tx_thread, 1);
+                       kthread_stop(ts->tx_thread);
+               }
+               /*
+                * Release this thread_id in the thread_set_bitmap
+                */
+               spin_lock(&ts_bitmap_lock);
+               bitmap_release_region(iscsit_global->ts_bitmap,
+                               ts->thread_id, get_order(1));
+               spin_unlock(&ts_bitmap_lock);
+
+               released_count++;
+               kfree(ts);
+       }
+
+       if (released_count) {
+               pr_debug("Stopped %d thread set(s) (%d total threads)."
+                       "\n", released_count, released_count * 2);
+       }
+}
+
+void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts)
+{
+       iscsi_add_ts_to_active_list(ts);
+
+       spin_lock_bh(&ts->ts_state_lock);
+       conn->thread_set = ts;
+       ts->conn = conn;
+       spin_unlock_bh(&ts->ts_state_lock);
+       /*
+        * Start up the RX thread and wait on rx_post_start_comp.  The RX
+        * Thread will then do the same for the TX Thread in
+        * iscsi_rx_thread_pre_handler().
+        */
+       complete(&ts->rx_start_comp);
+       wait_for_completion(&ts->rx_post_start_comp);
+}
+
+struct iscsi_thread_set *iscsi_get_thread_set(void)
+{
+       int allocate_ts = 0;
+       struct completion comp;
+       struct iscsi_thread_set *ts = NULL;
+       /*
+        * If no inactive thread set is available on the first call to
+        * iscsi_get_ts_from_inactive_list(), sleep for a second and
+        * try again.  If still none are available after two attempts,
+        * allocate a set ourselves.
+        */
+get_set:
+       ts = iscsi_get_ts_from_inactive_list();
+       if (!ts) {
+               if (allocate_ts == 2)
+                       iscsi_allocate_thread_sets(1);
+
+               init_completion(&comp);
+               wait_for_completion_timeout(&comp, 1 * HZ);
+
+               allocate_ts++;
+               goto get_set;
+       }
+
+       ts->delay_inactive = 1;
+       ts->signal_sent = 0;
+       ts->thread_count = 2;
+       init_completion(&ts->rx_restart_comp);
+       init_completion(&ts->tx_restart_comp);
+
+       return ts;
+}
+
+void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear)
+{
+       struct iscsi_thread_set *ts = NULL;
+
+       if (!conn->thread_set) {
+               pr_err("struct iscsi_conn->thread_set is NULL\n");
+               return;
+       }
+       ts = conn->thread_set;
+
+       spin_lock_bh(&ts->ts_state_lock);
+       ts->thread_clear &= ~thread_clear;
+
+       if ((thread_clear & ISCSI_CLEAR_RX_THREAD) &&
+           (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD))
+               complete(&ts->rx_restart_comp);
+       else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) &&
+                (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD))
+               complete(&ts->tx_restart_comp);
+       spin_unlock_bh(&ts->ts_state_lock);
+}
+
+void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent)
+{
+       struct iscsi_thread_set *ts = NULL;
+
+       if (!conn->thread_set) {
+               pr_err("struct iscsi_conn->thread_set is NULL\n");
+               return;
+       }
+       ts = conn->thread_set;
+
+       spin_lock_bh(&ts->ts_state_lock);
+       ts->signal_sent |= signal_sent;
+       spin_unlock_bh(&ts->ts_state_lock);
+}
+
+int iscsi_release_thread_set(struct iscsi_conn *conn)
+{
+       int thread_called = 0;
+       struct iscsi_thread_set *ts = NULL;
+
+       if (!conn || !conn->thread_set) {
+               pr_err("connection or thread set pointer is NULL\n");
+               BUG();
+       }
+       ts = conn->thread_set;
+
+       spin_lock_bh(&ts->ts_state_lock);
+       ts->status = ISCSI_THREAD_SET_RESET;
+
+       if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME,
+                       strlen(ISCSI_RX_THREAD_NAME)))
+               thread_called = ISCSI_RX_THREAD;
+       else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME,
+                       strlen(ISCSI_TX_THREAD_NAME)))
+               thread_called = ISCSI_TX_THREAD;
+
+       if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) &&
+          (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) {
+
+               if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) {
+                       send_sig(SIGINT, ts->rx_thread, 1);
+                       ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
+               }
+               ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD;
+               spin_unlock_bh(&ts->ts_state_lock);
+               wait_for_completion(&ts->rx_restart_comp);
+               spin_lock_bh(&ts->ts_state_lock);
+               ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD;
+       }
+       if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) &&
+          (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) {
+
+               if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) {
+                       send_sig(SIGINT, ts->tx_thread, 1);
+                       ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
+               }
+               ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD;
+               spin_unlock_bh(&ts->ts_state_lock);
+               wait_for_completion(&ts->tx_restart_comp);
+               spin_lock_bh(&ts->ts_state_lock);
+               ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD;
+       }
+
+       ts->conn = NULL;
+       ts->status = ISCSI_THREAD_SET_FREE;
+       spin_unlock_bh(&ts->ts_state_lock);
+
+       return 0;
+}
+
+int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn)
+{
+       struct iscsi_thread_set *ts;
+
+       if (!conn->thread_set)
+               return -1;
+       ts = conn->thread_set;
+
+       spin_lock_bh(&ts->ts_state_lock);
+       if (ts->status != ISCSI_THREAD_SET_ACTIVE) {
+               spin_unlock_bh(&ts->ts_state_lock);
+               return -1;
+       }
+
+       if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) {
+               send_sig(SIGINT, ts->tx_thread, 1);
+               ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
+       }
+       if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) {
+               send_sig(SIGINT, ts->rx_thread, 1);
+               ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
+       }
+       spin_unlock_bh(&ts->ts_state_lock);
+
+       return 0;
+}
+
+static void iscsi_check_to_add_additional_sets(void)
+{
+       int thread_sets_add;
+
+       spin_lock(&inactive_ts_lock);
+       thread_sets_add = iscsit_global->inactive_ts;
+       spin_unlock(&inactive_ts_lock);
+       if (thread_sets_add == 1)
+               iscsi_allocate_thread_sets(1);
+}
+
+static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+       spin_lock_bh(&ts->ts_state_lock);
+       if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) {
+               spin_unlock_bh(&ts->ts_state_lock);
+               return -1;
+       }
+       spin_unlock_bh(&ts->ts_state_lock);
+
+       return 0;
+}
+
+struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+       int ret;
+
+       spin_lock_bh(&ts->ts_state_lock);
+       if (ts->create_threads) {
+               spin_unlock_bh(&ts->ts_state_lock);
+               goto sleep;
+       }
+
+       flush_signals(current);
+
+       if (ts->delay_inactive && (--ts->thread_count == 0)) {
+               spin_unlock_bh(&ts->ts_state_lock);
+               iscsi_del_ts_from_active_list(ts);
+
+               if (!iscsit_global->in_shutdown)
+                       iscsi_deallocate_extra_thread_sets();
+
+               iscsi_add_ts_to_inactive_list(ts);
+               spin_lock_bh(&ts->ts_state_lock);
+       }
+
+       if ((ts->status == ISCSI_THREAD_SET_RESET) &&
+           (ts->thread_clear & ISCSI_CLEAR_RX_THREAD))
+               complete(&ts->rx_restart_comp);
+
+       ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD;
+       spin_unlock_bh(&ts->ts_state_lock);
+sleep:
+       ret = wait_for_completion_interruptible(&ts->rx_start_comp);
+       if (ret != 0)
+               return NULL;
+
+       if (iscsi_signal_thread_pre_handler(ts) < 0)
+               return NULL;
+
+       if (!ts->conn) {
+               pr_err("struct iscsi_thread_set->conn is NULL for"
+                       " thread_id: %d, going back to sleep\n", ts->thread_id);
+               goto sleep;
+       }
+       iscsi_check_to_add_additional_sets();
+       /*
+        * The RX Thread starts up the TX Thread and sleeps.
+        */
+       ts->thread_clear |= ISCSI_CLEAR_RX_THREAD;
+       complete(&ts->tx_start_comp);
+       wait_for_completion(&ts->tx_post_start_comp);
+
+       return ts->conn;
+}
+
+struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+       int ret;
+
+       spin_lock_bh(&ts->ts_state_lock);
+       if (ts->create_threads) {
+               spin_unlock_bh(&ts->ts_state_lock);
+               goto sleep;
+       }
+
+       flush_signals(current);
+
+       if (ts->delay_inactive && (--ts->thread_count == 0)) {
+               spin_unlock_bh(&ts->ts_state_lock);
+               iscsi_del_ts_from_active_list(ts);
+
+               if (!iscsit_global->in_shutdown)
+                       iscsi_deallocate_extra_thread_sets();
+
+               iscsi_add_ts_to_inactive_list(ts);
+               spin_lock_bh(&ts->ts_state_lock);
+       }
+       if ((ts->status == ISCSI_THREAD_SET_RESET) &&
+           (ts->thread_clear & ISCSI_CLEAR_TX_THREAD))
+               complete(&ts->tx_restart_comp);
+
+       ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD;
+       spin_unlock_bh(&ts->ts_state_lock);
+sleep:
+       ret = wait_for_completion_interruptible(&ts->tx_start_comp);
+       if (ret != 0)
+               return NULL;
+
+       if (iscsi_signal_thread_pre_handler(ts) < 0)
+               return NULL;
+
+       if (!ts->conn) {
+               pr_err("struct iscsi_thread_set->conn is NULL for "
+                       " thread_id: %d, going back to sleep\n",
+                       ts->thread_id);
+               goto sleep;
+       }
+
+       iscsi_check_to_add_additional_sets();
+       /*
+        * From the TX thread, up the tx_post_start_comp that the RX Thread is
+        * sleeping on in iscsi_rx_thread_pre_handler(), then up the
+        * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on.
+        */
+       ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
+       complete(&ts->tx_post_start_comp);
+       complete(&ts->rx_post_start_comp);
+
+       spin_lock_bh(&ts->ts_state_lock);
+       ts->status = ISCSI_THREAD_SET_ACTIVE;
+       spin_unlock_bh(&ts->ts_state_lock);
+
+       return ts->conn;
+}
+
+int iscsi_thread_set_init(void)
+{
+       int size;
+
+       iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS;
+
+       size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long);
+       iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL);
+       if (!iscsit_global->ts_bitmap) {
+               pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
+               return -ENOMEM;
+       }
+
+       spin_lock_init(&active_ts_lock);
+       spin_lock_init(&inactive_ts_lock);
+       spin_lock_init(&ts_bitmap_lock);
+       INIT_LIST_HEAD(&active_ts_list);
+       INIT_LIST_HEAD(&inactive_ts_list);
+
+       return 0;
+}
+
+void iscsi_thread_set_free(void)
+{
+       kfree(iscsit_global->ts_bitmap);
+}
diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h
new file mode 100644 (file)
index 0000000..26e6a95
--- /dev/null
@@ -0,0 +1,88 @@
+#ifndef ISCSI_THREAD_QUEUE_H
+#define ISCSI_THREAD_QUEUE_H
+
+/*
+ * Defines for thread sets.
+ */
+extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *);
+extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *);
+extern int iscsi_allocate_thread_sets(u32);
+extern void iscsi_deallocate_thread_sets(void);
+extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *);
+extern struct iscsi_thread_set *iscsi_get_thread_set(void);
+extern void iscsi_set_thread_clear(struct iscsi_conn *, u8);
+extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8);
+extern int iscsi_release_thread_set(struct iscsi_conn *);
+extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *);
+extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *);
+extern int iscsi_thread_set_init(void);
+extern void iscsi_thread_set_free(void);
+
+extern int iscsi_target_tx_thread(void *);
+extern int iscsi_target_rx_thread(void *);
+
+#define TARGET_THREAD_SET_COUNT                        4
+
+#define ISCSI_RX_THREAD                         1
+#define ISCSI_TX_THREAD                         2
+#define ISCSI_RX_THREAD_NAME                   "iscsi_trx"
+#define ISCSI_TX_THREAD_NAME                   "iscsi_ttx"
+#define ISCSI_BLOCK_RX_THREAD                  0x1
+#define ISCSI_BLOCK_TX_THREAD                  0x2
+#define ISCSI_CLEAR_RX_THREAD                  0x1
+#define ISCSI_CLEAR_TX_THREAD                  0x2
+#define ISCSI_SIGNAL_RX_THREAD                 0x1
+#define ISCSI_SIGNAL_TX_THREAD                 0x2
+
+/* struct iscsi_thread_set->status */
+#define ISCSI_THREAD_SET_FREE                  1
+#define ISCSI_THREAD_SET_ACTIVE                        2
+#define ISCSI_THREAD_SET_DIE                   3
+#define ISCSI_THREAD_SET_RESET                 4
+#define ISCSI_THREAD_SET_DEALLOCATE_THREADS    5
+
+/* By default allow a maximum of 32K iSCSI connections */
+#define ISCSI_TS_BITMAP_BITS                   32768
+
+struct iscsi_thread_set {
+       /* flags used for blocking and restarting sets */
+       int     blocked_threads;
+       /* flag for creating threads */
+       int     create_threads;
+       /* flag for delaying readding to inactive list */
+       int     delay_inactive;
+       /* status for thread set */
+       int     status;
+       /* which threads have had signals sent */
+       int     signal_sent;
+       /* flag for which threads exited first */
+       int     thread_clear;
+       /* Active threads in the thread set */
+       int     thread_count;
+       /* Unique thread ID */
+       u32     thread_id;
+       /* pointer to connection if set is active */
+       struct iscsi_conn       *conn;
+       /* used for controlling ts state accesses */
+       spinlock_t      ts_state_lock;
+       /* Used for rx side post startup */
+       struct completion       rx_post_start_comp;
+       /* Used for tx side post startup */
+       struct completion       tx_post_start_comp;
+       /* used for restarting thread queue */
+       struct completion       rx_restart_comp;
+       /* used for restarting thread queue */
+       struct completion       tx_restart_comp;
+       /* used for normal unused blocking */
+       struct completion       rx_start_comp;
+       /* used for normal unused blocking */
+       struct completion       tx_start_comp;
+       /* OS descriptor for rx thread */
+       struct task_struct      *rx_thread;
+       /* OS descriptor for tx thread */
+       struct task_struct      *tx_thread;
+       /* struct iscsi_thread_set in list list head*/
+       struct list_head        ts_list;
+};
+
+#endif   /*** ISCSI_THREAD_QUEUE_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
new file mode 100644 (file)
index 0000000..a1acb01
--- /dev/null
@@ -0,0 +1,1819 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target specific utility functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_tmr.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+#define PRINT_BUFF(buff, len)                                  \
+{                                                              \
+       int zzz;                                                \
+                                                               \
+       pr_debug("%d:\n", __LINE__);                            \
+       for (zzz = 0; zzz < len; zzz++) {                       \
+               if (zzz % 16 == 0) {                            \
+                       if (zzz)                                \
+                               pr_debug("\n");                 \
+                       pr_debug("%4i: ", zzz);                 \
+               }                                               \
+               pr_debug("%02x ", (unsigned char) (buff)[zzz]); \
+       }                                                       \
+       if ((len + 1) % 16)                                     \
+               pr_debug("\n");                                 \
+}
+
+extern struct list_head g_tiqn_list;
+extern spinlock_t tiqn_lock;
+
+/*
+ *     Called with cmd->r2t_lock held.
+ */
+int iscsit_add_r2t_to_list(
+       struct iscsi_cmd *cmd,
+       u32 offset,
+       u32 xfer_len,
+       int recovery,
+       u32 r2t_sn)
+{
+       struct iscsi_r2t *r2t;
+
+       r2t = kmem_cache_zalloc(lio_r2t_cache, GFP_ATOMIC);
+       if (!r2t) {
+               pr_err("Unable to allocate memory for struct iscsi_r2t.\n");
+               return -1;
+       }
+       INIT_LIST_HEAD(&r2t->r2t_list);
+
+       r2t->recovery_r2t = recovery;
+       r2t->r2t_sn = (!r2t_sn) ? cmd->r2t_sn++ : r2t_sn;
+       r2t->offset = offset;
+       r2t->xfer_len = xfer_len;
+       list_add_tail(&r2t->r2t_list, &cmd->cmd_r2t_list);
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T);
+
+       spin_lock_bh(&cmd->r2t_lock);
+       return 0;
+}
+
+struct iscsi_r2t *iscsit_get_r2t_for_eos(
+       struct iscsi_cmd *cmd,
+       u32 offset,
+       u32 length)
+{
+       struct iscsi_r2t *r2t;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+               if ((r2t->offset <= offset) &&
+                   (r2t->offset + r2t->xfer_len) >= (offset + length)) {
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return r2t;
+               }
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       pr_err("Unable to locate R2T for Offset: %u, Length:"
+                       " %u\n", offset, length);
+       return NULL;
+}
+
+struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *cmd)
+{
+       struct iscsi_r2t *r2t;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+               if (!r2t->sent_r2t) {
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return r2t;
+               }
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       pr_err("Unable to locate next R2T to send for ITT:"
+                       " 0x%08x.\n", cmd->init_task_tag);
+       return NULL;
+}
+
+/*
+ *     Called with cmd->r2t_lock held.
+ */
+void iscsit_free_r2t(struct iscsi_r2t *r2t, struct iscsi_cmd *cmd)
+{
+       list_del(&r2t->r2t_list);
+       kmem_cache_free(lio_r2t_cache, r2t);
+}
+
+void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
+{
+       struct iscsi_r2t *r2t, *r2t_tmp;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list)
+               iscsit_free_r2t(r2t, cmd);
+       spin_unlock_bh(&cmd->r2t_lock);
+}
+
+/*
+ * May be called from software interrupt (timer) context for allocating
+ * iSCSI NopINs.
+ */
+struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask)
+{
+       struct iscsi_cmd *cmd;
+
+       cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask);
+       if (!cmd) {
+               pr_err("Unable to allocate memory for struct iscsi_cmd.\n");
+               return NULL;
+       }
+
+       cmd->conn       = conn;
+       INIT_LIST_HEAD(&cmd->i_list);
+       INIT_LIST_HEAD(&cmd->datain_list);
+       INIT_LIST_HEAD(&cmd->cmd_r2t_list);
+       init_completion(&cmd->reject_comp);
+       spin_lock_init(&cmd->datain_lock);
+       spin_lock_init(&cmd->dataout_timeout_lock);
+       spin_lock_init(&cmd->istate_lock);
+       spin_lock_init(&cmd->error_lock);
+       spin_lock_init(&cmd->r2t_lock);
+
+       return cmd;
+}
+
+/*
+ * Called from iscsi_handle_scsi_cmd()
+ */
+struct iscsi_cmd *iscsit_allocate_se_cmd(
+       struct iscsi_conn *conn,
+       u32 data_length,
+       int data_direction,
+       int iscsi_task_attr)
+{
+       struct iscsi_cmd *cmd;
+       struct se_cmd *se_cmd;
+       int sam_task_attr;
+
+       cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+       if (!cmd)
+               return NULL;
+
+       cmd->data_direction = data_direction;
+       cmd->data_length = data_length;
+       /*
+        * Figure out the SAM Task Attribute for the incoming SCSI CDB
+        */
+       if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) ||
+           (iscsi_task_attr == ISCSI_ATTR_SIMPLE))
+               sam_task_attr = MSG_SIMPLE_TAG;
+       else if (iscsi_task_attr == ISCSI_ATTR_ORDERED)
+               sam_task_attr = MSG_ORDERED_TAG;
+       else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE)
+               sam_task_attr = MSG_HEAD_TAG;
+       else if (iscsi_task_attr == ISCSI_ATTR_ACA)
+               sam_task_attr = MSG_ACA_TAG;
+       else {
+               pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using"
+                       " MSG_SIMPLE_TAG\n", iscsi_task_attr);
+               sam_task_attr = MSG_SIMPLE_TAG;
+       }
+
+       se_cmd = &cmd->se_cmd;
+       /*
+        * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+        */
+       transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops,
+                       conn->sess->se_sess, data_length, data_direction,
+                       sam_task_attr, &cmd->sense_buffer[0]);
+       return cmd;
+}
+
+struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
+       struct iscsi_conn *conn,
+       u8 function)
+{
+       struct iscsi_cmd *cmd;
+       struct se_cmd *se_cmd;
+       u8 tcm_function;
+
+       cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+       if (!cmd)
+               return NULL;
+
+       cmd->data_direction = DMA_NONE;
+
+       cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL);
+       if (!cmd->tmr_req) {
+               pr_err("Unable to allocate memory for"
+                       " Task Management command!\n");
+               return NULL;
+       }
+       /*
+        * TASK_REASSIGN for ERL=2 / connection stays inside of
+        * LIO-Target $FABRIC_MOD
+        */
+       if (function == ISCSI_TM_FUNC_TASK_REASSIGN)
+               return cmd;
+
+       se_cmd = &cmd->se_cmd;
+       /*
+        * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+        */
+       transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops,
+                               conn->sess->se_sess, 0, DMA_NONE,
+                               MSG_SIMPLE_TAG, &cmd->sense_buffer[0]);
+
+       switch (function) {
+       case ISCSI_TM_FUNC_ABORT_TASK:
+               tcm_function = TMR_ABORT_TASK;
+               break;
+       case ISCSI_TM_FUNC_ABORT_TASK_SET:
+               tcm_function = TMR_ABORT_TASK_SET;
+               break;
+       case ISCSI_TM_FUNC_CLEAR_ACA:
+               tcm_function = TMR_CLEAR_ACA;
+               break;
+       case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+               tcm_function = TMR_CLEAR_TASK_SET;
+               break;
+       case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+               tcm_function = TMR_LUN_RESET;
+               break;
+       case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+               tcm_function = TMR_TARGET_WARM_RESET;
+               break;
+       case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+               tcm_function = TMR_TARGET_COLD_RESET;
+               break;
+       default:
+               pr_err("Unknown iSCSI TMR Function:"
+                       " 0x%02x\n", function);
+               goto out;
+       }
+
+       se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd,
+                               (void *)cmd->tmr_req, tcm_function);
+       if (!se_cmd->se_tmr_req)
+               goto out;
+
+       cmd->tmr_req->se_tmr_req = se_cmd->se_tmr_req;
+
+       return cmd;
+out:
+       iscsit_release_cmd(cmd);
+       if (se_cmd)
+               transport_free_se_cmd(se_cmd);
+       return NULL;
+}
+
+int iscsit_decide_list_to_build(
+       struct iscsi_cmd *cmd,
+       u32 immediate_data_length)
+{
+       struct iscsi_build_list bl;
+       struct iscsi_conn *conn = cmd->conn;
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na;
+
+       if (sess->sess_ops->DataSequenceInOrder &&
+           sess->sess_ops->DataPDUInOrder)
+               return 0;
+
+       if (cmd->data_direction == DMA_NONE)
+               return 0;
+
+       na = iscsit_tpg_get_node_attrib(sess);
+       memset(&bl, 0, sizeof(struct iscsi_build_list));
+
+       if (cmd->data_direction == DMA_FROM_DEVICE) {
+               bl.data_direction = ISCSI_PDU_READ;
+               bl.type = PDULIST_NORMAL;
+               if (na->random_datain_pdu_offsets)
+                       bl.randomize |= RANDOM_DATAIN_PDU_OFFSETS;
+               if (na->random_datain_seq_offsets)
+                       bl.randomize |= RANDOM_DATAIN_SEQ_OFFSETS;
+       } else {
+               bl.data_direction = ISCSI_PDU_WRITE;
+               bl.immediate_data_length = immediate_data_length;
+               if (na->random_r2t_offsets)
+                       bl.randomize |= RANDOM_R2T_OFFSETS;
+
+               if (!cmd->immediate_data && !cmd->unsolicited_data)
+                       bl.type = PDULIST_NORMAL;
+               else if (cmd->immediate_data && !cmd->unsolicited_data)
+                       bl.type = PDULIST_IMMEDIATE;
+               else if (!cmd->immediate_data && cmd->unsolicited_data)
+                       bl.type = PDULIST_UNSOLICITED;
+               else if (cmd->immediate_data && cmd->unsolicited_data)
+                       bl.type = PDULIST_IMMEDIATE_AND_UNSOLICITED;
+       }
+
+       return iscsit_do_build_list(cmd, &bl);
+}
+
+struct iscsi_seq *iscsit_get_seq_holder_for_datain(
+       struct iscsi_cmd *cmd,
+       u32 seq_send_order)
+{
+       u32 i;
+
+       for (i = 0; i < cmd->seq_count; i++)
+               if (cmd->seq_list[i].seq_send_order == seq_send_order)
+                       return &cmd->seq_list[i];
+
+       return NULL;
+}
+
+struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *cmd)
+{
+       u32 i;
+
+       if (!cmd->seq_list) {
+               pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+               return NULL;
+       }
+
+       for (i = 0; i < cmd->seq_count; i++) {
+               if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+                       continue;
+               if (cmd->seq_list[i].seq_send_order == cmd->seq_send_order) {
+                       cmd->seq_send_order++;
+                       return &cmd->seq_list[i];
+               }
+       }
+
+       return NULL;
+}
+
+struct iscsi_r2t *iscsit_get_holder_for_r2tsn(
+       struct iscsi_cmd *cmd,
+       u32 r2t_sn)
+{
+       struct iscsi_r2t *r2t;
+
+       spin_lock_bh(&cmd->r2t_lock);
+       list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+               if (r2t->r2t_sn == r2t_sn) {
+                       spin_unlock_bh(&cmd->r2t_lock);
+                       return r2t;
+               }
+       }
+       spin_unlock_bh(&cmd->r2t_lock);
+
+       return NULL;
+}
+
+static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn)
+{
+       int ret;
+
+       /*
+        * This is the proper method of checking received CmdSN against
+        * ExpCmdSN and MaxCmdSN values, as well as accounting for out
+        * or order CmdSNs due to multiple connection sessions and/or
+        * CRC failures.
+        */
+       if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) {
+               pr_err("Received CmdSN: 0x%08x is greater than"
+                      " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn,
+                      sess->max_cmd_sn);
+               ret = CMDSN_ERROR_CANNOT_RECOVER;
+
+       } else if (cmdsn == sess->exp_cmd_sn) {
+               sess->exp_cmd_sn++;
+               pr_debug("Received CmdSN matches ExpCmdSN,"
+                     " incremented ExpCmdSN to: 0x%08x\n",
+                     sess->exp_cmd_sn);
+               ret = CMDSN_NORMAL_OPERATION;
+
+       } else if (iscsi_sna_gt(cmdsn, sess->exp_cmd_sn)) {
+               pr_debug("Received CmdSN: 0x%08x is greater"
+                     " than ExpCmdSN: 0x%08x, not acknowledging.\n",
+                     cmdsn, sess->exp_cmd_sn);
+               ret = CMDSN_HIGHER_THAN_EXP;
+
+       } else {
+               pr_err("Received CmdSN: 0x%08x is less than"
+                      " ExpCmdSN: 0x%08x, ignoring.\n", cmdsn,
+                      sess->exp_cmd_sn);
+               ret = CMDSN_LOWER_THAN_EXP;
+       }
+
+       return ret;
+}
+
+/*
+ * Commands may be received out of order if MC/S is in use.
+ * Ensure they are executed in CmdSN order.
+ */
+int iscsit_sequence_cmd(
+       struct iscsi_conn *conn,
+       struct iscsi_cmd *cmd,
+       u32 cmdsn)
+{
+       int ret;
+       int cmdsn_ret;
+
+       mutex_lock(&conn->sess->cmdsn_mutex);
+
+       cmdsn_ret = iscsit_check_received_cmdsn(conn->sess, cmdsn);
+       switch (cmdsn_ret) {
+       case CMDSN_NORMAL_OPERATION:
+               ret = iscsit_execute_cmd(cmd, 0);
+               if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list))
+                       iscsit_execute_ooo_cmdsns(conn->sess);
+               break;
+       case CMDSN_HIGHER_THAN_EXP:
+               ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, cmdsn);
+               break;
+       case CMDSN_LOWER_THAN_EXP:
+               cmd->i_state = ISTATE_REMOVE;
+               iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+               ret = cmdsn_ret;
+               break;
+       default:
+               ret = cmdsn_ret;
+               break;
+       }
+       mutex_unlock(&conn->sess->cmdsn_mutex);
+
+       return ret;
+}
+
+int iscsit_check_unsolicited_dataout(struct iscsi_cmd *cmd, unsigned char *buf)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+       struct iscsi_data *hdr = (struct iscsi_data *) buf;
+       u32 payload_length = ntoh24(hdr->dlength);
+
+       if (conn->sess->sess_ops->InitialR2T) {
+               pr_err("Received unexpected unsolicited data"
+                       " while InitialR2T=Yes, protocol error.\n");
+               transport_send_check_condition_and_sense(se_cmd,
+                               TCM_UNEXPECTED_UNSOLICITED_DATA, 0);
+               return -1;
+       }
+
+       if ((cmd->first_burst_len + payload_length) >
+            conn->sess->sess_ops->FirstBurstLength) {
+               pr_err("Total %u bytes exceeds FirstBurstLength: %u"
+                       " for this Unsolicited DataOut Burst.\n",
+                       (cmd->first_burst_len + payload_length),
+                               conn->sess->sess_ops->FirstBurstLength);
+               transport_send_check_condition_and_sense(se_cmd,
+                               TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+               return -1;
+       }
+
+       if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))
+               return 0;
+
+       if (((cmd->first_burst_len + payload_length) != cmd->data_length) &&
+           ((cmd->first_burst_len + payload_length) !=
+             conn->sess->sess_ops->FirstBurstLength)) {
+               pr_err("Unsolicited non-immediate data received %u"
+                       " does not equal FirstBurstLength: %u, and does"
+                       " not equal ExpXferLen %u.\n",
+                       (cmd->first_burst_len + payload_length),
+                       conn->sess->sess_ops->FirstBurstLength, cmd->data_length);
+               transport_send_check_condition_and_sense(se_cmd,
+                               TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+               return -1;
+       }
+       return 0;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_itt(
+       struct iscsi_conn *conn,
+       u32 init_task_tag)
+{
+       struct iscsi_cmd *cmd;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+               if (cmd->init_task_tag == init_task_tag) {
+                       spin_unlock_bh(&conn->cmd_lock);
+                       return cmd;
+               }
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+
+       pr_err("Unable to locate ITT: 0x%08x on CID: %hu",
+                       init_task_tag, conn->cid);
+       return NULL;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(
+       struct iscsi_conn *conn,
+       u32 init_task_tag,
+       u32 length)
+{
+       struct iscsi_cmd *cmd;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+               if (cmd->init_task_tag == init_task_tag) {
+                       spin_unlock_bh(&conn->cmd_lock);
+                       return cmd;
+               }
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+
+       pr_err("Unable to locate ITT: 0x%08x on CID: %hu,"
+                       " dumping payload\n", init_task_tag, conn->cid);
+       if (length)
+               iscsit_dump_data_payload(conn, length, 1);
+
+       return NULL;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_ttt(
+       struct iscsi_conn *conn,
+       u32 targ_xfer_tag)
+{
+       struct iscsi_cmd *cmd = NULL;
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+               if (cmd->targ_xfer_tag == targ_xfer_tag) {
+                       spin_unlock_bh(&conn->cmd_lock);
+                       return cmd;
+               }
+       }
+       spin_unlock_bh(&conn->cmd_lock);
+
+       pr_err("Unable to locate TTT: 0x%08x on CID: %hu\n",
+                       targ_xfer_tag, conn->cid);
+       return NULL;
+}
+
+int iscsit_find_cmd_for_recovery(
+       struct iscsi_session *sess,
+       struct iscsi_cmd **cmd_ptr,
+       struct iscsi_conn_recovery **cr_ptr,
+       u32 init_task_tag)
+{
+       struct iscsi_cmd *cmd = NULL;
+       struct iscsi_conn_recovery *cr;
+       /*
+        * Scan through the inactive connection recovery list's command list.
+        * If init_task_tag matches the command is still alligent.
+        */
+       spin_lock(&sess->cr_i_lock);
+       list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) {
+               spin_lock(&cr->conn_recovery_cmd_lock);
+               list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) {
+                       if (cmd->init_task_tag == init_task_tag) {
+                               spin_unlock(&cr->conn_recovery_cmd_lock);
+                               spin_unlock(&sess->cr_i_lock);
+
+                               *cr_ptr = cr;
+                               *cmd_ptr = cmd;
+                               return -2;
+                       }
+               }
+               spin_unlock(&cr->conn_recovery_cmd_lock);
+       }
+       spin_unlock(&sess->cr_i_lock);
+       /*
+        * Scan through the active connection recovery list's command list.
+        * If init_task_tag matches the command is ready to be reassigned.
+        */
+       spin_lock(&sess->cr_a_lock);
+       list_for_each_entry(cr, &sess->cr_active_list, cr_list) {
+               spin_lock(&cr->conn_recovery_cmd_lock);
+               list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) {
+                       if (cmd->init_task_tag == init_task_tag) {
+                               spin_unlock(&cr->conn_recovery_cmd_lock);
+                               spin_unlock(&sess->cr_a_lock);
+
+                               *cr_ptr = cr;
+                               *cmd_ptr = cmd;
+                               return 0;
+                       }
+               }
+               spin_unlock(&cr->conn_recovery_cmd_lock);
+       }
+       spin_unlock(&sess->cr_a_lock);
+
+       return -1;
+}
+
+void iscsit_add_cmd_to_immediate_queue(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn,
+       u8 state)
+{
+       struct iscsi_queue_req *qr;
+
+       qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC);
+       if (!qr) {
+               pr_err("Unable to allocate memory for"
+                               " struct iscsi_queue_req\n");
+               return;
+       }
+       INIT_LIST_HEAD(&qr->qr_list);
+       qr->cmd = cmd;
+       qr->state = state;
+
+       spin_lock_bh(&conn->immed_queue_lock);
+       list_add_tail(&qr->qr_list, &conn->immed_queue_list);
+       atomic_inc(&cmd->immed_queue_count);
+       atomic_set(&conn->check_immediate_queue, 1);
+       spin_unlock_bh(&conn->immed_queue_lock);
+
+       wake_up_process(conn->thread_set->tx_thread);
+}
+
+struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn)
+{
+       struct iscsi_queue_req *qr;
+
+       spin_lock_bh(&conn->immed_queue_lock);
+       if (list_empty(&conn->immed_queue_list)) {
+               spin_unlock_bh(&conn->immed_queue_lock);
+               return NULL;
+       }
+       list_for_each_entry(qr, &conn->immed_queue_list, qr_list)
+               break;
+
+       list_del(&qr->qr_list);
+       if (qr->cmd)
+               atomic_dec(&qr->cmd->immed_queue_count);
+       spin_unlock_bh(&conn->immed_queue_lock);
+
+       return qr;
+}
+
+static void iscsit_remove_cmd_from_immediate_queue(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_queue_req *qr, *qr_tmp;
+
+       spin_lock_bh(&conn->immed_queue_lock);
+       if (!atomic_read(&cmd->immed_queue_count)) {
+               spin_unlock_bh(&conn->immed_queue_lock);
+               return;
+       }
+
+       list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) {
+               if (qr->cmd != cmd)
+                       continue;
+
+               atomic_dec(&qr->cmd->immed_queue_count);
+               list_del(&qr->qr_list);
+               kmem_cache_free(lio_qr_cache, qr);
+       }
+       spin_unlock_bh(&conn->immed_queue_lock);
+
+       if (atomic_read(&cmd->immed_queue_count)) {
+               pr_err("ITT: 0x%08x immed_queue_count: %d\n",
+                       cmd->init_task_tag,
+                       atomic_read(&cmd->immed_queue_count));
+       }
+}
+
+void iscsit_add_cmd_to_response_queue(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn,
+       u8 state)
+{
+       struct iscsi_queue_req *qr;
+
+       qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC);
+       if (!qr) {
+               pr_err("Unable to allocate memory for"
+                       " struct iscsi_queue_req\n");
+               return;
+       }
+       INIT_LIST_HEAD(&qr->qr_list);
+       qr->cmd = cmd;
+       qr->state = state;
+
+       spin_lock_bh(&conn->response_queue_lock);
+       list_add_tail(&qr->qr_list, &conn->response_queue_list);
+       atomic_inc(&cmd->response_queue_count);
+       spin_unlock_bh(&conn->response_queue_lock);
+
+       wake_up_process(conn->thread_set->tx_thread);
+}
+
+struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn)
+{
+       struct iscsi_queue_req *qr;
+
+       spin_lock_bh(&conn->response_queue_lock);
+       if (list_empty(&conn->response_queue_list)) {
+               spin_unlock_bh(&conn->response_queue_lock);
+               return NULL;
+       }
+
+       list_for_each_entry(qr, &conn->response_queue_list, qr_list)
+               break;
+
+       list_del(&qr->qr_list);
+       if (qr->cmd)
+               atomic_dec(&qr->cmd->response_queue_count);
+       spin_unlock_bh(&conn->response_queue_lock);
+
+       return qr;
+}
+
+static void iscsit_remove_cmd_from_response_queue(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct iscsi_queue_req *qr, *qr_tmp;
+
+       spin_lock_bh(&conn->response_queue_lock);
+       if (!atomic_read(&cmd->response_queue_count)) {
+               spin_unlock_bh(&conn->response_queue_lock);
+               return;
+       }
+
+       list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list,
+                               qr_list) {
+               if (qr->cmd != cmd)
+                       continue;
+
+               atomic_dec(&qr->cmd->response_queue_count);
+               list_del(&qr->qr_list);
+               kmem_cache_free(lio_qr_cache, qr);
+       }
+       spin_unlock_bh(&conn->response_queue_lock);
+
+       if (atomic_read(&cmd->response_queue_count)) {
+               pr_err("ITT: 0x%08x response_queue_count: %d\n",
+                       cmd->init_task_tag,
+                       atomic_read(&cmd->response_queue_count));
+       }
+}
+
+void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn)
+{
+       struct iscsi_queue_req *qr, *qr_tmp;
+
+       spin_lock_bh(&conn->immed_queue_lock);
+       list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) {
+               list_del(&qr->qr_list);
+               if (qr->cmd)
+                       atomic_dec(&qr->cmd->immed_queue_count);
+
+               kmem_cache_free(lio_qr_cache, qr);
+       }
+       spin_unlock_bh(&conn->immed_queue_lock);
+
+       spin_lock_bh(&conn->response_queue_lock);
+       list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list,
+                       qr_list) {
+               list_del(&qr->qr_list);
+               if (qr->cmd)
+                       atomic_dec(&qr->cmd->response_queue_count);
+
+               kmem_cache_free(lio_qr_cache, qr);
+       }
+       spin_unlock_bh(&conn->response_queue_lock);
+}
+
+void iscsit_release_cmd(struct iscsi_cmd *cmd)
+{
+       struct iscsi_conn *conn = cmd->conn;
+       int i;
+
+       iscsit_free_r2ts_from_list(cmd);
+       iscsit_free_all_datain_reqs(cmd);
+
+       kfree(cmd->buf_ptr);
+       kfree(cmd->pdu_list);
+       kfree(cmd->seq_list);
+       kfree(cmd->tmr_req);
+       kfree(cmd->iov_data);
+
+       for (i = 0; i < cmd->t_mem_sg_nents; i++)
+               __free_page(sg_page(&cmd->t_mem_sg[i]));
+
+       kfree(cmd->t_mem_sg);
+
+       if (conn) {
+               iscsit_remove_cmd_from_immediate_queue(cmd, conn);
+               iscsit_remove_cmd_from_response_queue(cmd, conn);
+       }
+
+       kmem_cache_free(lio_cmd_cache, cmd);
+}
+
+int iscsit_check_session_usage_count(struct iscsi_session *sess)
+{
+       spin_lock_bh(&sess->session_usage_lock);
+       if (sess->session_usage_count != 0) {
+               sess->session_waiting_on_uc = 1;
+               spin_unlock_bh(&sess->session_usage_lock);
+               if (in_interrupt())
+                       return 2;
+
+               wait_for_completion(&sess->session_waiting_on_uc_comp);
+               return 1;
+       }
+       spin_unlock_bh(&sess->session_usage_lock);
+
+       return 0;
+}
+
+void iscsit_dec_session_usage_count(struct iscsi_session *sess)
+{
+       spin_lock_bh(&sess->session_usage_lock);
+       sess->session_usage_count--;
+
+       if (!sess->session_usage_count && sess->session_waiting_on_uc)
+               complete(&sess->session_waiting_on_uc_comp);
+
+       spin_unlock_bh(&sess->session_usage_lock);
+}
+
+void iscsit_inc_session_usage_count(struct iscsi_session *sess)
+{
+       spin_lock_bh(&sess->session_usage_lock);
+       sess->session_usage_count++;
+       spin_unlock_bh(&sess->session_usage_lock);
+}
+
+/*
+ *     Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker
+ *     array counts needed for sync and steering.
+ */
+static int iscsit_determine_sync_and_steering_counts(
+       struct iscsi_conn *conn,
+       struct iscsi_data_count *count)
+{
+       u32 length = count->data_length;
+       u32 marker, markint;
+
+       count->sync_and_steering = 1;
+
+       marker = (count->type == ISCSI_RX_DATA) ?
+                       conn->of_marker : conn->if_marker;
+       markint = (count->type == ISCSI_RX_DATA) ?
+                       (conn->conn_ops->OFMarkInt * 4) :
+                       (conn->conn_ops->IFMarkInt * 4);
+       count->ss_iov_count = count->iov_count;
+
+       while (length > 0) {
+               if (length >= marker) {
+                       count->ss_iov_count += 3;
+                       count->ss_marker_count += 2;
+
+                       length -= marker;
+                       marker = markint;
+               } else
+                       length = 0;
+       }
+
+       return 0;
+}
+
+/*
+ *     Setup conn->if_marker and conn->of_marker values based upon
+ *     the initial marker-less interval. (see iSCSI v19 A.2)
+ */
+int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn)
+{
+       int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0;
+       /*
+        * IFMarkInt and OFMarkInt are negotiated as 32-bit words.
+        */
+       u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4);
+       u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4);
+
+       if (conn->conn_ops->OFMarker) {
+               /*
+                * Account for the first Login Command received not
+                * via iscsi_recv_msg().
+                */
+               conn->of_marker += ISCSI_HDR_LEN;
+               if (conn->of_marker <= OFMarkInt) {
+                       conn->of_marker = (OFMarkInt - conn->of_marker);
+               } else {
+                       login_ofmarker_count = (conn->of_marker / OFMarkInt);
+                       next_marker = (OFMarkInt * (login_ofmarker_count + 1)) +
+                                       (login_ofmarker_count * MARKER_SIZE);
+                       conn->of_marker = (next_marker - conn->of_marker);
+               }
+               conn->of_marker_offset = 0;
+               pr_debug("Setting OFMarker value to %u based on Initial"
+                       " Markerless Interval.\n", conn->of_marker);
+       }
+
+       if (conn->conn_ops->IFMarker) {
+               if (conn->if_marker <= IFMarkInt) {
+                       conn->if_marker = (IFMarkInt - conn->if_marker);
+               } else {
+                       login_ifmarker_count = (conn->if_marker / IFMarkInt);
+                       next_marker = (IFMarkInt * (login_ifmarker_count + 1)) +
+                                       (login_ifmarker_count * MARKER_SIZE);
+                       conn->if_marker = (next_marker - conn->if_marker);
+               }
+               pr_debug("Setting IFMarker value to %u based on Initial"
+                       " Markerless Interval.\n", conn->if_marker);
+       }
+
+       return 0;
+}
+
+struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid)
+{
+       struct iscsi_conn *conn;
+
+       spin_lock_bh(&sess->conn_lock);
+       list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+               if ((conn->cid == cid) &&
+                   (conn->conn_state == TARG_CONN_STATE_LOGGED_IN)) {
+                       iscsit_inc_conn_usage_count(conn);
+                       spin_unlock_bh(&sess->conn_lock);
+                       return conn;
+               }
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       return NULL;
+}
+
+struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *sess, u16 cid)
+{
+       struct iscsi_conn *conn;
+
+       spin_lock_bh(&sess->conn_lock);
+       list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+               if (conn->cid == cid) {
+                       iscsit_inc_conn_usage_count(conn);
+                       spin_lock(&conn->state_lock);
+                       atomic_set(&conn->connection_wait_rcfr, 1);
+                       spin_unlock(&conn->state_lock);
+                       spin_unlock_bh(&sess->conn_lock);
+                       return conn;
+               }
+       }
+       spin_unlock_bh(&sess->conn_lock);
+
+       return NULL;
+}
+
+void iscsit_check_conn_usage_count(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->conn_usage_lock);
+       if (conn->conn_usage_count != 0) {
+               conn->conn_waiting_on_uc = 1;
+               spin_unlock_bh(&conn->conn_usage_lock);
+
+               wait_for_completion(&conn->conn_waiting_on_uc_comp);
+               return;
+       }
+       spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+void iscsit_dec_conn_usage_count(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->conn_usage_lock);
+       conn->conn_usage_count--;
+
+       if (!conn->conn_usage_count && conn->conn_waiting_on_uc)
+               complete(&conn->conn_waiting_on_uc_comp);
+
+       spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+void iscsit_inc_conn_usage_count(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->conn_usage_lock);
+       conn->conn_usage_count++;
+       spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+static int iscsit_add_nopin(struct iscsi_conn *conn, int want_response)
+{
+       u8 state;
+       struct iscsi_cmd *cmd;
+
+       cmd = iscsit_allocate_cmd(conn, GFP_ATOMIC);
+       if (!cmd)
+               return -1;
+
+       cmd->iscsi_opcode = ISCSI_OP_NOOP_IN;
+       state = (want_response) ? ISTATE_SEND_NOPIN_WANT_RESPONSE :
+                               ISTATE_SEND_NOPIN_NO_RESPONSE;
+       cmd->init_task_tag = 0xFFFFFFFF;
+       spin_lock_bh(&conn->sess->ttt_lock);
+       cmd->targ_xfer_tag = (want_response) ? conn->sess->targ_xfer_tag++ :
+                       0xFFFFFFFF;
+       if (want_response && (cmd->targ_xfer_tag == 0xFFFFFFFF))
+               cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+       spin_unlock_bh(&conn->sess->ttt_lock);
+
+       spin_lock_bh(&conn->cmd_lock);
+       list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+       spin_unlock_bh(&conn->cmd_lock);
+
+       if (want_response)
+               iscsit_start_nopin_response_timer(conn);
+       iscsit_add_cmd_to_immediate_queue(cmd, conn, state);
+
+       return 0;
+}
+
+static void iscsit_handle_nopin_response_timeout(unsigned long data)
+{
+       struct iscsi_conn *conn = (struct iscsi_conn *) data;
+
+       iscsit_inc_conn_usage_count(conn);
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (conn->nopin_response_timer_flags & ISCSI_TF_STOP) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               iscsit_dec_conn_usage_count(conn);
+               return;
+       }
+
+       pr_debug("Did not receive response to NOPIN on CID: %hu on"
+               " SID: %u, failing connection.\n", conn->cid,
+                       conn->sess->sid);
+       conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING;
+       spin_unlock_bh(&conn->nopin_timer_lock);
+
+       {
+       struct iscsi_portal_group *tpg = conn->sess->tpg;
+       struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+       if (tiqn) {
+               spin_lock_bh(&tiqn->sess_err_stats.lock);
+               strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name,
+                               (void *)conn->sess->sess_ops->InitiatorName);
+               tiqn->sess_err_stats.last_sess_failure_type =
+                               ISCSI_SESS_ERR_CXN_TIMEOUT;
+               tiqn->sess_err_stats.cxn_timeout_errors++;
+               conn->sess->conn_timeout_errors++;
+               spin_unlock_bh(&tiqn->sess_err_stats.lock);
+       }
+       }
+
+       iscsit_cause_connection_reinstatement(conn, 0);
+       iscsit_dec_conn_usage_count(conn);
+}
+
+void iscsit_mod_nopin_response_timer(struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               return;
+       }
+
+       mod_timer(&conn->nopin_response_timer,
+               (get_jiffies_64() + na->nopin_response_timeout * HZ));
+       spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+/*
+ *     Called with conn->nopin_timer_lock held.
+ */
+void iscsit_start_nopin_response_timer(struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (conn->nopin_response_timer_flags & ISCSI_TF_RUNNING) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               return;
+       }
+
+       init_timer(&conn->nopin_response_timer);
+       conn->nopin_response_timer.expires =
+               (get_jiffies_64() + na->nopin_response_timeout * HZ);
+       conn->nopin_response_timer.data = (unsigned long)conn;
+       conn->nopin_response_timer.function = iscsit_handle_nopin_response_timeout;
+       conn->nopin_response_timer_flags &= ~ISCSI_TF_STOP;
+       conn->nopin_response_timer_flags |= ISCSI_TF_RUNNING;
+       add_timer(&conn->nopin_response_timer);
+
+       pr_debug("Started NOPIN Response Timer on CID: %d to %u"
+               " seconds\n", conn->cid, na->nopin_response_timeout);
+       spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+void iscsit_stop_nopin_response_timer(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               return;
+       }
+       conn->nopin_response_timer_flags |= ISCSI_TF_STOP;
+       spin_unlock_bh(&conn->nopin_timer_lock);
+
+       del_timer_sync(&conn->nopin_response_timer);
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING;
+       spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+static void iscsit_handle_nopin_timeout(unsigned long data)
+{
+       struct iscsi_conn *conn = (struct iscsi_conn *) data;
+
+       iscsit_inc_conn_usage_count(conn);
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (conn->nopin_timer_flags & ISCSI_TF_STOP) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               iscsit_dec_conn_usage_count(conn);
+               return;
+       }
+       conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING;
+       spin_unlock_bh(&conn->nopin_timer_lock);
+
+       iscsit_add_nopin(conn, 1);
+       iscsit_dec_conn_usage_count(conn);
+}
+
+/*
+ * Called with conn->nopin_timer_lock held.
+ */
+void __iscsit_start_nopin_timer(struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+       /*
+       * NOPIN timeout is disabled.
+        */
+       if (!na->nopin_timeout)
+               return;
+
+       if (conn->nopin_timer_flags & ISCSI_TF_RUNNING)
+               return;
+
+       init_timer(&conn->nopin_timer);
+       conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ);
+       conn->nopin_timer.data = (unsigned long)conn;
+       conn->nopin_timer.function = iscsit_handle_nopin_timeout;
+       conn->nopin_timer_flags &= ~ISCSI_TF_STOP;
+       conn->nopin_timer_flags |= ISCSI_TF_RUNNING;
+       add_timer(&conn->nopin_timer);
+
+       pr_debug("Started NOPIN Timer on CID: %d at %u second"
+               " interval\n", conn->cid, na->nopin_timeout);
+}
+
+void iscsit_start_nopin_timer(struct iscsi_conn *conn)
+{
+       struct iscsi_session *sess = conn->sess;
+       struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+       /*
+        * NOPIN timeout is disabled..
+        */
+       if (!na->nopin_timeout)
+               return;
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               return;
+       }
+
+       init_timer(&conn->nopin_timer);
+       conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ);
+       conn->nopin_timer.data = (unsigned long)conn;
+       conn->nopin_timer.function = iscsit_handle_nopin_timeout;
+       conn->nopin_timer_flags &= ~ISCSI_TF_STOP;
+       conn->nopin_timer_flags |= ISCSI_TF_RUNNING;
+       add_timer(&conn->nopin_timer);
+
+       pr_debug("Started NOPIN Timer on CID: %d at %u second"
+                       " interval\n", conn->cid, na->nopin_timeout);
+       spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+void iscsit_stop_nopin_timer(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->nopin_timer_lock);
+       if (!(conn->nopin_timer_flags & ISCSI_TF_RUNNING)) {
+               spin_unlock_bh(&conn->nopin_timer_lock);
+               return;
+       }
+       conn->nopin_timer_flags |= ISCSI_TF_STOP;
+       spin_unlock_bh(&conn->nopin_timer_lock);
+
+       del_timer_sync(&conn->nopin_timer);
+
+       spin_lock_bh(&conn->nopin_timer_lock);
+       conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING;
+       spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+int iscsit_send_tx_data(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn,
+       int use_misc)
+{
+       int tx_sent, tx_size;
+       u32 iov_count;
+       struct kvec *iov;
+
+send_data:
+       tx_size = cmd->tx_size;
+
+       if (!use_misc) {
+               iov = &cmd->iov_data[0];
+               iov_count = cmd->iov_data_count;
+       } else {
+               iov = &cmd->iov_misc[0];
+               iov_count = cmd->iov_misc_count;
+       }
+
+       tx_sent = tx_data(conn, &iov[0], iov_count, tx_size);
+       if (tx_size != tx_sent) {
+               if (tx_sent == -EAGAIN) {
+                       pr_err("tx_data() returned -EAGAIN\n");
+                       goto send_data;
+               } else
+                       return -1;
+       }
+       cmd->tx_size = 0;
+
+       return 0;
+}
+
+int iscsit_fe_sendpage_sg(
+       struct iscsi_cmd *cmd,
+       struct iscsi_conn *conn)
+{
+       struct scatterlist *sg = cmd->first_data_sg;
+       struct kvec iov;
+       u32 tx_hdr_size, data_len;
+       u32 offset = cmd->first_data_sg_off;
+       int tx_sent;
+
+send_hdr:
+       tx_hdr_size = ISCSI_HDR_LEN;
+       if (conn->conn_ops->HeaderDigest)
+               tx_hdr_size += ISCSI_CRC_LEN;
+
+       iov.iov_base = cmd->pdu;
+       iov.iov_len = tx_hdr_size;
+
+       tx_sent = tx_data(conn, &iov, 1, tx_hdr_size);
+       if (tx_hdr_size != tx_sent) {
+               if (tx_sent == -EAGAIN) {
+                       pr_err("tx_data() returned -EAGAIN\n");
+                       goto send_hdr;
+               }
+               return -1;
+       }
+
+       data_len = cmd->tx_size - tx_hdr_size - cmd->padding;
+       if (conn->conn_ops->DataDigest)
+               data_len -= ISCSI_CRC_LEN;
+
+       /*
+        * Perform sendpage() for each page in the scatterlist
+        */
+       while (data_len) {
+               u32 space = (sg->length - offset);
+               u32 sub_len = min_t(u32, data_len, space);
+send_pg:
+               tx_sent = conn->sock->ops->sendpage(conn->sock,
+                                       sg_page(sg), sg->offset + offset, sub_len, 0);
+               if (tx_sent != sub_len) {
+                       if (tx_sent == -EAGAIN) {
+                               pr_err("tcp_sendpage() returned"
+                                               " -EAGAIN\n");
+                               goto send_pg;
+                       }
+
+                       pr_err("tcp_sendpage() failure: %d\n",
+                                       tx_sent);
+                       return -1;
+               }
+
+               data_len -= sub_len;
+               offset = 0;
+               sg = sg_next(sg);
+       }
+
+send_padding:
+       if (cmd->padding) {
+               struct kvec *iov_p =
+                       &cmd->iov_data[cmd->iov_data_count-1];
+
+               tx_sent = tx_data(conn, iov_p, 1, cmd->padding);
+               if (cmd->padding != tx_sent) {
+                       if (tx_sent == -EAGAIN) {
+                               pr_err("tx_data() returned -EAGAIN\n");
+                               goto send_padding;
+                       }
+                       return -1;
+               }
+       }
+
+send_datacrc:
+       if (conn->conn_ops->DataDigest) {
+               struct kvec *iov_d =
+                       &cmd->iov_data[cmd->iov_data_count];
+
+               tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN);
+               if (ISCSI_CRC_LEN != tx_sent) {
+                       if (tx_sent == -EAGAIN) {
+                               pr_err("tx_data() returned -EAGAIN\n");
+                               goto send_datacrc;
+                       }
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ *      This function is used for mainly sending a ISCSI_TARG_LOGIN_RSP PDU
+ *      back to the Initiator when an expection condition occurs with the
+ *      errors set in status_class and status_detail.
+ *
+ *      Parameters:     iSCSI Connection, Status Class, Status Detail.
+ *      Returns:        0 on success, -1 on error.
+ */
+int iscsit_tx_login_rsp(struct iscsi_conn *conn, u8 status_class, u8 status_detail)
+{
+       u8 iscsi_hdr[ISCSI_HDR_LEN];
+       int err;
+       struct kvec iov;
+       struct iscsi_login_rsp *hdr;
+
+       iscsit_collect_login_stats(conn, status_class, status_detail);
+
+       memset(&iov, 0, sizeof(struct kvec));
+       memset(&iscsi_hdr, 0x0, ISCSI_HDR_LEN);
+
+       hdr     = (struct iscsi_login_rsp *)&iscsi_hdr;
+       hdr->opcode             = ISCSI_OP_LOGIN_RSP;
+       hdr->status_class       = status_class;
+       hdr->status_detail      = status_detail;
+       hdr->itt                = cpu_to_be32(conn->login_itt);
+
+       iov.iov_base            = &iscsi_hdr;
+       iov.iov_len             = ISCSI_HDR_LEN;
+
+       PRINT_BUFF(iscsi_hdr, ISCSI_HDR_LEN);
+
+       err = tx_data(conn, &iov, 1, ISCSI_HDR_LEN);
+       if (err != ISCSI_HDR_LEN) {
+               pr_err("tx_data returned less than expected\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+void iscsit_print_session_params(struct iscsi_session *sess)
+{
+       struct iscsi_conn *conn;
+
+       pr_debug("-----------------------------[Session Params for"
+               " SID: %u]-----------------------------\n", sess->sid);
+       spin_lock_bh(&sess->conn_lock);
+       list_for_each_entry(conn, &sess->sess_conn_list, conn_list)
+               iscsi_dump_conn_ops(conn->conn_ops);
+       spin_unlock_bh(&sess->conn_lock);
+
+       iscsi_dump_sess_ops(sess->sess_ops);
+}
+
+static int iscsit_do_rx_data(
+       struct iscsi_conn *conn,
+       struct iscsi_data_count *count)
+{
+       int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
+       u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0;
+       struct kvec iov[count->ss_iov_count], *iov_p;
+       struct msghdr msg;
+
+       if (!conn || !conn->sock || !conn->conn_ops)
+               return -1;
+
+       memset(&msg, 0, sizeof(struct msghdr));
+
+       if (count->sync_and_steering) {
+               int size = 0;
+               u32 i, orig_iov_count = 0;
+               u32 orig_iov_len = 0, orig_iov_loc = 0;
+               u32 iov_count = 0, per_iov_bytes = 0;
+               u32 *rx_marker, old_rx_marker = 0;
+               struct kvec *iov_record;
+
+               memset(&rx_marker_val, 0,
+                               count->ss_marker_count * sizeof(u32));
+               memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
+
+               iov_record = count->iov;
+               orig_iov_count = count->iov_count;
+               rx_marker = &conn->of_marker;
+
+               i = 0;
+               size = data;
+               orig_iov_len = iov_record[orig_iov_loc].iov_len;
+               while (size > 0) {
+                       pr_debug("rx_data: #1 orig_iov_len %u,"
+                       " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
+                       pr_debug("rx_data: #2 rx_marker %u, size"
+                               " %u\n", *rx_marker, size);
+
+                       if (orig_iov_len >= *rx_marker) {
+                               iov[iov_count].iov_len = *rx_marker;
+                               iov[iov_count++].iov_base =
+                                       (iov_record[orig_iov_loc].iov_base +
+                                               per_iov_bytes);
+
+                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
+                               iov[iov_count++].iov_base =
+                                       &rx_marker_val[rx_marker_iov++];
+                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
+                               iov[iov_count++].iov_base =
+                                       &rx_marker_val[rx_marker_iov++];
+                               old_rx_marker = *rx_marker;
+
+                               /*
+                                * OFMarkInt is in 32-bit words.
+                                */
+                               *rx_marker = (conn->conn_ops->OFMarkInt * 4);
+                               size -= old_rx_marker;
+                               orig_iov_len -= old_rx_marker;
+                               per_iov_bytes += old_rx_marker;
+
+                               pr_debug("rx_data: #3 new_rx_marker"
+                                       " %u, size %u\n", *rx_marker, size);
+                       } else {
+                               iov[iov_count].iov_len = orig_iov_len;
+                               iov[iov_count++].iov_base =
+                                       (iov_record[orig_iov_loc].iov_base +
+                                               per_iov_bytes);
+
+                               per_iov_bytes = 0;
+                               *rx_marker -= orig_iov_len;
+                               size -= orig_iov_len;
+
+                               if (size)
+                                       orig_iov_len =
+                                       iov_record[++orig_iov_loc].iov_len;
+
+                               pr_debug("rx_data: #4 new_rx_marker"
+                                       " %u, size %u\n", *rx_marker, size);
+                       }
+               }
+               data += (rx_marker_iov * (MARKER_SIZE / 2));
+
+               iov_p   = &iov[0];
+               iov_len = iov_count;
+
+               if (iov_count > count->ss_iov_count) {
+                       pr_err("iov_count: %d, count->ss_iov_count:"
+                               " %d\n", iov_count, count->ss_iov_count);
+                       return -1;
+               }
+               if (rx_marker_iov > count->ss_marker_count) {
+                       pr_err("rx_marker_iov: %d, count->ss_marker"
+                               "_count: %d\n", rx_marker_iov,
+                               count->ss_marker_count);
+                       return -1;
+               }
+       } else {
+               iov_p = count->iov;
+               iov_len = count->iov_count;
+       }
+
+       while (total_rx < data) {
+               rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
+                                       (data - total_rx), MSG_WAITALL);
+               if (rx_loop <= 0) {
+                       pr_debug("rx_loop: %d total_rx: %d\n",
+                               rx_loop, total_rx);
+                       return rx_loop;
+               }
+               total_rx += rx_loop;
+               pr_debug("rx_loop: %d, total_rx: %d, data: %d\n",
+                               rx_loop, total_rx, data);
+       }
+
+       if (count->sync_and_steering) {
+               int j;
+               for (j = 0; j < rx_marker_iov; j++) {
+                       pr_debug("rx_data: #5 j: %d, offset: %d\n",
+                               j, rx_marker_val[j]);
+                       conn->of_marker_offset = rx_marker_val[j];
+               }
+               total_rx -= (rx_marker_iov * (MARKER_SIZE / 2));
+       }
+
+       return total_rx;
+}
+
+static int iscsit_do_tx_data(
+       struct iscsi_conn *conn,
+       struct iscsi_data_count *count)
+{
+       int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
+       u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0;
+       struct kvec iov[count->ss_iov_count], *iov_p;
+       struct msghdr msg;
+
+       if (!conn || !conn->sock || !conn->conn_ops)
+               return -1;
+
+       if (data <= 0) {
+               pr_err("Data length is: %d\n", data);
+               return -1;
+       }
+
+       memset(&msg, 0, sizeof(struct msghdr));
+
+       if (count->sync_and_steering) {
+               int size = 0;
+               u32 i, orig_iov_count = 0;
+               u32 orig_iov_len = 0, orig_iov_loc = 0;
+               u32 iov_count = 0, per_iov_bytes = 0;
+               u32 *tx_marker, old_tx_marker = 0;
+               struct kvec *iov_record;
+
+               memset(&tx_marker_val, 0,
+                       count->ss_marker_count * sizeof(u32));
+               memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
+
+               iov_record = count->iov;
+               orig_iov_count = count->iov_count;
+               tx_marker = &conn->if_marker;
+
+               i = 0;
+               size = data;
+               orig_iov_len = iov_record[orig_iov_loc].iov_len;
+               while (size > 0) {
+                       pr_debug("tx_data: #1 orig_iov_len %u,"
+                       " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
+                       pr_debug("tx_data: #2 tx_marker %u, size"
+                               " %u\n", *tx_marker, size);
+
+                       if (orig_iov_len >= *tx_marker) {
+                               iov[iov_count].iov_len = *tx_marker;
+                               iov[iov_count++].iov_base =
+                                       (iov_record[orig_iov_loc].iov_base +
+                                               per_iov_bytes);
+
+                               tx_marker_val[tx_marker_iov] =
+                                               (size - *tx_marker);
+                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
+                               iov[iov_count++].iov_base =
+                                       &tx_marker_val[tx_marker_iov++];
+                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
+                               iov[iov_count++].iov_base =
+                                       &tx_marker_val[tx_marker_iov++];
+                               old_tx_marker = *tx_marker;
+
+                               /*
+                                * IFMarkInt is in 32-bit words.
+                                */
+                               *tx_marker = (conn->conn_ops->IFMarkInt * 4);
+                               size -= old_tx_marker;
+                               orig_iov_len -= old_tx_marker;
+                               per_iov_bytes += old_tx_marker;
+
+                               pr_debug("tx_data: #3 new_tx_marker"
+                                       " %u, size %u\n", *tx_marker, size);
+                               pr_debug("tx_data: #4 offset %u\n",
+                                       tx_marker_val[tx_marker_iov-1]);
+                       } else {
+                               iov[iov_count].iov_len = orig_iov_len;
+                               iov[iov_count++].iov_base
+                                       = (iov_record[orig_iov_loc].iov_base +
+                                               per_iov_bytes);
+
+                               per_iov_bytes = 0;
+                               *tx_marker -= orig_iov_len;
+                               size -= orig_iov_len;
+
+                               if (size)
+                                       orig_iov_len =
+                                       iov_record[++orig_iov_loc].iov_len;
+
+                               pr_debug("tx_data: #5 new_tx_marker"
+                                       " %u, size %u\n", *tx_marker, size);
+                       }
+               }
+
+               data += (tx_marker_iov * (MARKER_SIZE / 2));
+
+               iov_p = &iov[0];
+               iov_len = iov_count;
+
+               if (iov_count > count->ss_iov_count) {
+                       pr_err("iov_count: %d, count->ss_iov_count:"
+                               " %d\n", iov_count, count->ss_iov_count);
+                       return -1;
+               }
+               if (tx_marker_iov > count->ss_marker_count) {
+                       pr_err("tx_marker_iov: %d, count->ss_marker"
+                               "_count: %d\n", tx_marker_iov,
+                               count->ss_marker_count);
+                       return -1;
+               }
+       } else {
+               iov_p = count->iov;
+               iov_len = count->iov_count;
+       }
+
+       while (total_tx < data) {
+               tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
+                                       (data - total_tx));
+               if (tx_loop <= 0) {
+                       pr_debug("tx_loop: %d total_tx %d\n",
+                               tx_loop, total_tx);
+                       return tx_loop;
+               }
+               total_tx += tx_loop;
+               pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
+                                       tx_loop, total_tx, data);
+       }
+
+       if (count->sync_and_steering)
+               total_tx -= (tx_marker_iov * (MARKER_SIZE / 2));
+
+       return total_tx;
+}
+
+int rx_data(
+       struct iscsi_conn *conn,
+       struct kvec *iov,
+       int iov_count,
+       int data)
+{
+       struct iscsi_data_count c;
+
+       if (!conn || !conn->sock || !conn->conn_ops)
+               return -1;
+
+       memset(&c, 0, sizeof(struct iscsi_data_count));
+       c.iov = iov;
+       c.iov_count = iov_count;
+       c.data_length = data;
+       c.type = ISCSI_RX_DATA;
+
+       if (conn->conn_ops->OFMarker &&
+          (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
+               if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
+                       return -1;
+       }
+
+       return iscsit_do_rx_data(conn, &c);
+}
+
+int tx_data(
+       struct iscsi_conn *conn,
+       struct kvec *iov,
+       int iov_count,
+       int data)
+{
+       struct iscsi_data_count c;
+
+       if (!conn || !conn->sock || !conn->conn_ops)
+               return -1;
+
+       memset(&c, 0, sizeof(struct iscsi_data_count));
+       c.iov = iov;
+       c.iov_count = iov_count;
+       c.data_length = data;
+       c.type = ISCSI_TX_DATA;
+
+       if (conn->conn_ops->IFMarker &&
+          (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
+               if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
+                       return -1;
+       }
+
+       return iscsit_do_tx_data(conn, &c);
+}
+
+void iscsit_collect_login_stats(
+       struct iscsi_conn *conn,
+       u8 status_class,
+       u8 status_detail)
+{
+       struct iscsi_param *intrname = NULL;
+       struct iscsi_tiqn *tiqn;
+       struct iscsi_login_stats *ls;
+
+       tiqn = iscsit_snmp_get_tiqn(conn);
+       if (!tiqn)
+               return;
+
+       ls = &tiqn->login_stats;
+
+       spin_lock(&ls->lock);
+       if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) &&
+           ((get_jiffies_64() - ls->last_fail_time) < 10)) {
+               /* We already have the failure info for this login */
+               spin_unlock(&ls->lock);
+               return;
+       }
+
+       if (status_class == ISCSI_STATUS_CLS_SUCCESS)
+               ls->accepts++;
+       else if (status_class == ISCSI_STATUS_CLS_REDIRECT) {
+               ls->redirects++;
+               ls->last_fail_type = ISCSI_LOGIN_FAIL_REDIRECT;
+       } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR)  &&
+                (status_detail == ISCSI_LOGIN_STATUS_AUTH_FAILED)) {
+               ls->authenticate_fails++;
+               ls->last_fail_type =  ISCSI_LOGIN_FAIL_AUTHENTICATE;
+       } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR)  &&
+                (status_detail == ISCSI_LOGIN_STATUS_TGT_FORBIDDEN)) {
+               ls->authorize_fails++;
+               ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHORIZE;
+       } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) &&
+                (status_detail == ISCSI_LOGIN_STATUS_INIT_ERR)) {
+               ls->negotiate_fails++;
+               ls->last_fail_type = ISCSI_LOGIN_FAIL_NEGOTIATE;
+       } else {
+               ls->other_fails++;
+               ls->last_fail_type = ISCSI_LOGIN_FAIL_OTHER;
+       }
+
+       /* Save initiator name, ip address and time, if it is a failed login */
+       if (status_class != ISCSI_STATUS_CLS_SUCCESS) {
+               if (conn->param_list)
+                       intrname = iscsi_find_param_from_key(INITIATORNAME,
+                                                            conn->param_list);
+               strcpy(ls->last_intr_fail_name,
+                      (intrname ? intrname->value : "Unknown"));
+
+               ls->last_intr_fail_ip_family = conn->sock->sk->sk_family;
+               snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE,
+                               "%s", conn->login_ip);
+               ls->last_fail_time = get_jiffies_64();
+       }
+
+       spin_unlock(&ls->lock);
+}
+
+struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn)
+{
+       struct iscsi_portal_group *tpg;
+
+       if (!conn || !conn->sess)
+               return NULL;
+
+       tpg = conn->sess->tpg;
+       if (!tpg)
+               return NULL;
+
+       if (!tpg->tpg_tiqn)
+               return NULL;
+
+       return tpg->tpg_tiqn;
+}
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
new file mode 100644 (file)
index 0000000..2cd49d6
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef ISCSI_TARGET_UTIL_H
+#define ISCSI_TARGET_UTIL_H
+
+#define MARKER_SIZE    8
+
+extern int iscsit_add_r2t_to_list(struct iscsi_cmd *, u32, u32, int, u32);
+extern struct iscsi_r2t *iscsit_get_r2t_for_eos(struct iscsi_cmd *, u32, u32);
+extern struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *);
+extern void iscsit_free_r2t(struct iscsi_r2t *, struct iscsi_cmd *);
+extern void iscsit_free_r2ts_from_list(struct iscsi_cmd *);
+extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t);
+extern struct iscsi_cmd *iscsit_allocate_se_cmd(struct iscsi_conn *, u32, int, int);
+extern struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(struct iscsi_conn *, u8);
+extern int iscsit_decide_list_to_build(struct iscsi_cmd *, u32);
+extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32);
+extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *);
+extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32);
+int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, u32 cmdsn);
+extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *);
+extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, u32);
+extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *,
+                       u32, u32);
+extern struct iscsi_cmd *iscsit_find_cmd_from_ttt(struct iscsi_conn *, u32);
+extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd **,
+                       struct iscsi_conn_recovery **, u32);
+extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *);
+extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *);
+extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *);
+extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
+extern void iscsit_release_cmd(struct iscsi_cmd *);
+extern int iscsit_check_session_usage_count(struct iscsi_session *);
+extern void iscsit_dec_session_usage_count(struct iscsi_session *);
+extern void iscsit_inc_session_usage_count(struct iscsi_session *);
+extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *);
+extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16);
+extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16);
+extern void iscsit_check_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_dec_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_inc_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_mod_nopin_response_timer(struct iscsi_conn *);
+extern void iscsit_start_nopin_response_timer(struct iscsi_conn *);
+extern void iscsit_stop_nopin_response_timer(struct iscsi_conn *);
+extern void __iscsit_start_nopin_timer(struct iscsi_conn *);
+extern void iscsit_start_nopin_timer(struct iscsi_conn *);
+extern void iscsit_stop_nopin_timer(struct iscsi_conn *);
+extern int iscsit_send_tx_data(struct iscsi_cmd *, struct iscsi_conn *, int);
+extern int iscsit_fe_sendpage_sg(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_tx_login_rsp(struct iscsi_conn *, u8, u8);
+extern void iscsit_print_session_params(struct iscsi_session *);
+extern int iscsit_print_dev_to_proc(char *, char **, off_t, int);
+extern int iscsit_print_sessions_to_proc(char *, char **, off_t, int);
+extern int iscsit_print_tpg_to_proc(char *, char **, off_t, int);
+extern int rx_data(struct iscsi_conn *, struct kvec *, int, int);
+extern int tx_data(struct iscsi_conn *, struct kvec *, int, int);
+extern void iscsit_collect_login_stats(struct iscsi_conn *, u8, u8);
+extern struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_UTIL_H ***/
index 46352d6..c75a01a 100644 (file)
@@ -4052,17 +4052,16 @@ static int transport_allocate_data_tasks(
        struct se_task *task;
        struct se_device *dev = cmd->se_dev;
        unsigned long flags;
-       sector_t sectors;
        int task_count, i, ret;
-       sector_t dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors;
+       sector_t sectors, dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors;
        u32 sector_size = dev->se_sub_dev->se_dev_attrib.block_size;
        struct scatterlist *sg;
        struct scatterlist *cmd_sg;
 
        WARN_ON(cmd->data_length % sector_size);
        sectors = DIV_ROUND_UP(cmd->data_length, sector_size);
-       task_count = DIV_ROUND_UP(sectors, dev_max_sectors);
-
+       task_count = DIV_ROUND_UP_SECTOR_T(sectors, dev_max_sectors);
+       
        cmd_sg = sgl;
        for (i = 0; i < task_count; i++) {
                unsigned int task_size;
index 21d816e..f441726 100644 (file)
@@ -28,6 +28,17 @@ menuconfig WATCHDOG
 
 if WATCHDOG
 
+config WATCHDOG_CORE
+       bool "WatchDog Timer Driver Core"
+       ---help---
+         Say Y here if you want to use the new watchdog timer driver core.
+         This driver provides a framework for all watchdog timer drivers
+         and gives them the /dev/watchdog interface (and later also the
+         sysfs interface).
+
+         To compile this driver as a module, choose M here: the module will
+         be called watchdog.
+
 config WATCHDOG_NOWAYOUT
        bool "Disable watchdog shutdown on close"
        help
@@ -186,6 +197,15 @@ config SA1100_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called sa1100_wdt.
 
+config DW_WATCHDOG
+       tristate "Synopsys DesignWare watchdog"
+       depends on ARM && HAVE_CLK
+       help
+         Say Y here if to include support for the Synopsys DesignWare
+         watchdog timer found in many ARM chips.
+         To compile this driver as a module, choose M here: the
+         module will be called dw_wdt.
+
 config MPCORE_WATCHDOG
        tristate "MPcore watchdog"
        depends on HAVE_ARM_TWD
@@ -321,7 +341,7 @@ config MAX63XX_WATCHDOG
 
 config IMX2_WDT
        tristate "IMX2+ Watchdog"
-       depends on ARCH_MX2 || ARCH_MX25 || ARCH_MX3 || ARCH_MX5
+       depends on IMX_HAVE_PLATFORM_IMX2_WDT
        help
          This is the driver for the hardware watchdog
          on the Freescale IMX2 and later processors.
@@ -879,6 +899,20 @@ config M54xx_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called m54xx_wdt.
 
+# MicroBlaze Architecture
+
+config XILINX_WATCHDOG
+       tristate "Xilinx Watchdog timer"
+       depends on MICROBLAZE
+       ---help---
+         Watchdog driver for the xps_timebase_wdt ip core.
+
+         IMPORTANT: The xps_timebase_wdt parent must have the property
+         "clock-frequency" at device tree.
+
+         To compile this driver as a module, choose M here: the
+         module will be called of_xilinx_wdt.
+
 # MIPS Architecture
 
 config ATH79_WDT
index ed26f70..55bd574 100644 (file)
@@ -2,6 +2,10 @@
 # Makefile for the WatchDog device drivers.
 #
 
+# The WatchDog Timer Driver Core.
+watchdog-objs  += watchdog_core.o watchdog_dev.o
+obj-$(CONFIG_WATCHDOG_CORE)    += watchdog.o
+
 # Only one watchdog can succeed. We probe the ISA/PCI/USB based
 # watchdog-cards first, then the architecture specific watchdog
 # drivers and then the architecture independent "softdog" driver.
@@ -37,6 +41,7 @@ obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o
 obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
 obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
 obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o
+obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o
 obj-$(CONFIG_MPCORE_WATCHDOG) += mpcore_wdt.o
 obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o
 obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o
@@ -109,6 +114,9 @@ obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o
 # M68K Architecture
 obj-$(CONFIG_M54xx_WATCHDOG) += m54xx_wdt.o
 
+# MicroBlaze Architecture
+obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o
+
 # MIPS Architecture
 obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o
 obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o
index eac2602..87445b2 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/bitops.h>
 #include <linux/uaccess.h>
 
-#include <mach/at91_wdt.h>
+#include "at91sam9_wdt.h"
 
 #define DRV_NAME "AT91SAM9 Watchdog"
 
@@ -284,27 +284,8 @@ static int __exit at91wdt_remove(struct platform_device *pdev)
        return res;
 }
 
-#ifdef CONFIG_PM
-
-static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message)
-{
-       return 0;
-}
-
-static int at91wdt_resume(struct platform_device *pdev)
-{
-       return 0;
-}
-
-#else
-#define at91wdt_suspend        NULL
-#define at91wdt_resume NULL
-#endif
-
 static struct platform_driver at91wdt_driver = {
        .remove         = __exit_p(at91wdt_remove),
-       .suspend        = at91wdt_suspend,
-       .resume         = at91wdt_resume,
        .driver         = {
                .name   = "at91_wdt",
                .owner  = THIS_MODULE,
similarity index 96%
rename from arch/arm/mach-at91/include/mach/at91_wdt.h
rename to drivers/watchdog/at91sam9_wdt.h
index fecc2e9..757f9ca 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-at91/include/mach/at91_wdt.h
+ * drivers/watchdog/at91sam9_wdt.h
  *
  * Copyright (C) 2007 Andrew Victor
  * Copyright (C) 2007 Atmel Corporation.
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
new file mode 100644 (file)
index 0000000..f10f8c0
--- /dev/null
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2010-2011 Picochip Ltd., Jamie Iles
+ * http://www.picochip.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file implements a driver for the Synopsys DesignWare watchdog device
+ * in the many ARM subsystems. The watchdog has 16 different timeout periods
+ * and these are a function of the input clock frequency.
+ *
+ * The DesignWare watchdog cannot be stopped once it has been started so we
+ * use a software timer to implement a ping that will keep the watchdog alive.
+ * If we receive an expected close for the watchdog then we keep the timer
+ * running, otherwise the timer is stopped and the watchdog will expire.
+ */
+#define pr_fmt(fmt) "dw_wdt: " fmt
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/uaccess.h>
+#include <linux/watchdog.h>
+
+#define WDOG_CONTROL_REG_OFFSET                    0x00
+#define WDOG_CONTROL_REG_WDT_EN_MASK       0x01
+#define WDOG_TIMEOUT_RANGE_REG_OFFSET      0x04
+#define WDOG_CURRENT_COUNT_REG_OFFSET      0x08
+#define WDOG_COUNTER_RESTART_REG_OFFSET     0x0c
+#define WDOG_COUNTER_RESTART_KICK_VALUE            0x76
+
+/* The maximum TOP (timeout period) value that can be set in the watchdog. */
+#define DW_WDT_MAX_TOP         15
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
+                "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+#define WDT_TIMEOUT            (HZ / 2)
+
+static struct {
+       spinlock_t              lock;
+       void __iomem            *regs;
+       struct clk              *clk;
+       unsigned long           in_use;
+       unsigned long           next_heartbeat;
+       struct timer_list       timer;
+       int                     expect_close;
+} dw_wdt;
+
+static inline int dw_wdt_is_enabled(void)
+{
+       return readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET) &
+               WDOG_CONTROL_REG_WDT_EN_MASK;
+}
+
+static inline int dw_wdt_top_in_seconds(unsigned top)
+{
+       /*
+        * There are 16 possible timeout values in 0..15 where the number of
+        * cycles is 2 ^ (16 + i) and the watchdog counts down.
+        */
+       return (1 << (16 + top)) / clk_get_rate(dw_wdt.clk);
+}
+
+static int dw_wdt_get_top(void)
+{
+       int top = readl(dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF;
+
+       return dw_wdt_top_in_seconds(top);
+}
+
+static inline void dw_wdt_set_next_heartbeat(void)
+{
+       dw_wdt.next_heartbeat = jiffies + dw_wdt_get_top() * HZ;
+}
+
+static int dw_wdt_set_top(unsigned top_s)
+{
+       int i, top_val = DW_WDT_MAX_TOP;
+
+       /*
+        * Iterate over the timeout values until we find the closest match. We
+        * always look for >=.
+        */
+       for (i = 0; i <= DW_WDT_MAX_TOP; ++i)
+               if (dw_wdt_top_in_seconds(i) >= top_s) {
+                       top_val = i;
+                       break;
+               }
+
+       /* Set the new value in the watchdog. */
+       writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+
+       dw_wdt_set_next_heartbeat();
+
+       return dw_wdt_top_in_seconds(top_val);
+}
+
+static void dw_wdt_keepalive(void)
+{
+       writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs +
+              WDOG_COUNTER_RESTART_REG_OFFSET);
+}
+
+static void dw_wdt_ping(unsigned long data)
+{
+       if (time_before(jiffies, dw_wdt.next_heartbeat) ||
+           (!nowayout && !dw_wdt.in_use)) {
+               dw_wdt_keepalive();
+               mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+       } else
+               pr_crit("keepalive missed, machine will reset\n");
+}
+
+static int dw_wdt_open(struct inode *inode, struct file *filp)
+{
+       if (test_and_set_bit(0, &dw_wdt.in_use))
+               return -EBUSY;
+
+       /* Make sure we don't get unloaded. */
+       __module_get(THIS_MODULE);
+
+       spin_lock(&dw_wdt.lock);
+       if (!dw_wdt_is_enabled()) {
+               /*
+                * The watchdog is not currently enabled. Set the timeout to
+                * the maximum and then start it.
+                */
+               dw_wdt_set_top(DW_WDT_MAX_TOP);
+               writel(WDOG_CONTROL_REG_WDT_EN_MASK,
+                      dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
+       }
+
+       dw_wdt_set_next_heartbeat();
+
+       spin_unlock(&dw_wdt.lock);
+
+       return nonseekable_open(inode, filp);
+}
+
+ssize_t dw_wdt_write(struct file *filp, const char __user *buf, size_t len,
+                    loff_t *offset)
+{
+       if (!len)
+               return 0;
+
+       if (!nowayout) {
+               size_t i;
+
+               dw_wdt.expect_close = 0;
+
+               for (i = 0; i < len; ++i) {
+                       char c;
+
+                       if (get_user(c, buf + i))
+                               return -EFAULT;
+
+                       if (c == 'V') {
+                               dw_wdt.expect_close = 1;
+                               break;
+                       }
+               }
+       }
+
+       dw_wdt_set_next_heartbeat();
+       mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+
+       return len;
+}
+
+static u32 dw_wdt_time_left(void)
+{
+       return readl(dw_wdt.regs + WDOG_CURRENT_COUNT_REG_OFFSET) /
+               clk_get_rate(dw_wdt.clk);
+}
+
+static const struct watchdog_info dw_wdt_ident = {
+       .options        = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT |
+                         WDIOF_MAGICCLOSE,
+       .identity       = "Synopsys DesignWare Watchdog",
+};
+
+static long dw_wdt_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+       unsigned long val;
+       int timeout;
+
+       switch (cmd) {
+       case WDIOC_GETSUPPORT:
+               return copy_to_user((struct watchdog_info *)arg, &dw_wdt_ident,
+                                   sizeof(dw_wdt_ident)) ? -EFAULT : 0;
+
+       case WDIOC_GETSTATUS:
+       case WDIOC_GETBOOTSTATUS:
+               return put_user(0, (int *)arg);
+
+       case WDIOC_KEEPALIVE:
+               dw_wdt_set_next_heartbeat();
+               return 0;
+
+       case WDIOC_SETTIMEOUT:
+               if (get_user(val, (int __user *)arg))
+                       return -EFAULT;
+               timeout = dw_wdt_set_top(val);
+               return put_user(timeout , (int __user *)arg);
+
+       case WDIOC_GETTIMEOUT:
+               return put_user(dw_wdt_get_top(), (int __user *)arg);
+
+       case WDIOC_GETTIMELEFT:
+               /* Get the time left until expiry. */
+               if (get_user(val, (int __user *)arg))
+                       return -EFAULT;
+               return put_user(dw_wdt_time_left(), (int __user *)arg);
+
+       default:
+               return -ENOTTY;
+       }
+}
+
+static int dw_wdt_release(struct inode *inode, struct file *filp)
+{
+       clear_bit(0, &dw_wdt.in_use);
+
+       if (!dw_wdt.expect_close) {
+               del_timer(&dw_wdt.timer);
+
+               if (!nowayout)
+                       pr_crit("unexpected close, system will reboot soon\n");
+               else
+                       pr_crit("watchdog cannot be disabled, system will reboot soon\n");
+       }
+
+       dw_wdt.expect_close = 0;
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int dw_wdt_suspend(struct device *dev)
+{
+       clk_disable(dw_wdt.clk);
+
+       return 0;
+}
+
+static int dw_wdt_resume(struct device *dev)
+{
+       int err = clk_enable(dw_wdt.clk);
+
+       if (err)
+               return err;
+
+       dw_wdt_keepalive();
+
+       return 0;
+}
+
+static const struct dev_pm_ops dw_wdt_pm_ops = {
+       .suspend        = dw_wdt_suspend,
+       .resume         = dw_wdt_resume,
+};
+#endif /* CONFIG_PM */
+
+static const struct file_operations wdt_fops = {
+       .owner          = THIS_MODULE,
+       .llseek         = no_llseek,
+       .open           = dw_wdt_open,
+       .write          = dw_wdt_write,
+       .unlocked_ioctl = dw_wdt_ioctl,
+       .release        = dw_wdt_release
+};
+
+static struct miscdevice dw_wdt_miscdev = {
+       .fops           = &wdt_fops,
+       .name           = "watchdog",
+       .minor          = WATCHDOG_MINOR,
+};
+
+static int __devinit dw_wdt_drv_probe(struct platform_device *pdev)
+{
+       int ret;
+       struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       if (!mem)
+               return -EINVAL;
+
+       if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
+                                    "dw_wdt"))
+               return -ENOMEM;
+
+       dw_wdt.regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+       if (!dw_wdt.regs)
+               return -ENOMEM;
+
+       dw_wdt.clk = clk_get(&pdev->dev, NULL);
+       if (IS_ERR(dw_wdt.clk))
+               return PTR_ERR(dw_wdt.clk);
+
+       ret = clk_enable(dw_wdt.clk);
+       if (ret)
+               goto out_put_clk;
+
+       spin_lock_init(&dw_wdt.lock);
+
+       ret = misc_register(&dw_wdt_miscdev);
+       if (ret)
+               goto out_disable_clk;
+
+       dw_wdt_set_next_heartbeat();
+       setup_timer(&dw_wdt.timer, dw_wdt_ping, 0);
+       mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+
+       return 0;
+
+out_disable_clk:
+       clk_disable(dw_wdt.clk);
+out_put_clk:
+       clk_put(dw_wdt.clk);
+
+       return ret;
+}
+
+static int __devexit dw_wdt_drv_remove(struct platform_device *pdev)
+{
+       misc_deregister(&dw_wdt_miscdev);
+
+       clk_disable(dw_wdt.clk);
+       clk_put(dw_wdt.clk);
+
+       return 0;
+}
+
+static struct platform_driver dw_wdt_driver = {
+       .probe          = dw_wdt_drv_probe,
+       .remove         = __devexit_p(dw_wdt_drv_remove),
+       .driver         = {
+               .name   = "dw_wdt",
+               .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &dw_wdt_pm_ops,
+#endif /* CONFIG_PM */
+       },
+};
+
+static int __init dw_wdt_watchdog_init(void)
+{
+       return platform_driver_register(&dw_wdt_driver);
+}
+module_init(dw_wdt_watchdog_init);
+
+static void __exit dw_wdt_watchdog_exit(void)
+{
+       platform_driver_unregister(&dw_wdt_driver);
+}
+module_exit(dw_wdt_watchdog_exit);
+
+MODULE_AUTHOR("Jamie Iles");
+MODULE_DESCRIPTION("Synopsys DesignWare Watchdog Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
index 8cb2685..410fba4 100644 (file)
@@ -36,7 +36,7 @@
 #include <asm/cacheflush.h>
 #endif /* CONFIG_HPWDT_NMI_DECODING */
 
-#define HPWDT_VERSION                  "1.2.0"
+#define HPWDT_VERSION                  "1.3.0"
 #define SECS_TO_TICKS(secs)            ((secs) * 1000 / 128)
 #define TICKS_TO_SECS(ticks)           ((ticks) * 128 / 1000)
 #define HPWDT_MAX_TIMER                        TICKS_TO_SECS(65535)
@@ -87,6 +87,19 @@ struct smbios_cru64_info {
 };
 #define SMBIOS_CRU64_INFORMATION       212
 
+/* type 219 */
+struct smbios_proliant_info {
+       u8 type;
+       u8 byte_length;
+       u16 handle;
+       u32 power_features;
+       u32 omega_features;
+       u32 reserved;
+       u32 misc_features;
+};
+#define SMBIOS_ICRU_INFORMATION                219
+
+
 struct cmn_registers {
        union {
                struct {
@@ -132,6 +145,7 @@ struct cmn_registers {
 static unsigned int hpwdt_nmi_decoding;
 static unsigned int allow_kdump;
 static unsigned int priority;          /* hpwdt at end of die_notify list */
+static unsigned int is_icru;
 static DEFINE_SPINLOCK(rom_lock);
 static void *cru_rom_addr;
 static struct cmn_registers cmn_regs;
@@ -476,19 +490,22 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
                goto out;
 
        spin_lock_irqsave(&rom_lock, rom_pl);
-       if (!die_nmi_called)
+       if (!die_nmi_called && !is_icru)
                asminline_call(&cmn_regs, cru_rom_addr);
        die_nmi_called = 1;
        spin_unlock_irqrestore(&rom_lock, rom_pl);
-       if (cmn_regs.u1.ral == 0) {
-               printk(KERN_WARNING "hpwdt: An NMI occurred, "
-                       "but unable to determine source.\n");
-       } else {
-               if (allow_kdump)
-                       hpwdt_stop();
-               panic("An NMI occurred, please see the Integrated "
-                       "Management Log for details.\n");
+       if (!is_icru) {
+               if (cmn_regs.u1.ral == 0) {
+                       printk(KERN_WARNING "hpwdt: An NMI occurred, "
+                               "but unable to determine source.\n");
+               }
        }
+
+       if (allow_kdump)
+               hpwdt_stop();
+       panic("An NMI occurred, please see the Integrated "
+               "Management Log for details.\n");
+
 out:
        return NOTIFY_OK;
 }
@@ -659,30 +676,63 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 }
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+/*
+ *     dmi_find_icru
+ *
+ *     Routine Description:
+ *     This function checks whether or not we are on an iCRU-based server.
+ *     This check is independent of architecture and needs to be made for
+ *     any ProLiant system.
+ */
+static void __devinit dmi_find_icru(const struct dmi_header *dm, void *dummy)
+{
+       struct smbios_proliant_info *smbios_proliant_ptr;
+
+       if (dm->type == SMBIOS_ICRU_INFORMATION) {
+               smbios_proliant_ptr = (struct smbios_proliant_info *) dm;
+               if (smbios_proliant_ptr->misc_features & 0x01)
+                       is_icru = 1;
+       }
+}
+
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
        int retval;
 
        /*
-        * We need to map the ROM to get the CRU service.
-        * For 32 bit Operating Systems we need to go through the 32 Bit
-        * BIOS Service Directory
-        * For 64 bit Operating Systems we get that service through SMBIOS.
+        * On typical CRU-based systems we need to map that service in
+        * the BIOS. For 32 bit Operating Systems we need to go through
+        * the 32 Bit BIOS Service Directory. For 64 bit Operating
+        * Systems we get that service through SMBIOS.
+        *
+        * On systems that support the new iCRU service all we need to
+        * do is call dmi_walk to get the supported flag value and skip
+        * the old cru detect code.
         */
-       retval = detect_cru_service();
-       if (retval < 0) {
-               dev_warn(&dev->dev,
-                       "Unable to detect the %d Bit CRU Service.\n",
-                       HPWDT_ARCH);
-               return retval;
-       }
+       dmi_walk(dmi_find_icru, NULL);
+       if (!is_icru) {
+
+               /*
+               * We need to map the ROM to get the CRU service.
+               * For 32 bit Operating Systems we need to go through the 32 Bit
+               * BIOS Service Directory
+               * For 64 bit Operating Systems we get that service through SMBIOS.
+               */
+               retval = detect_cru_service();
+               if (retval < 0) {
+                       dev_warn(&dev->dev,
+                               "Unable to detect the %d Bit CRU Service.\n",
+                               HPWDT_ARCH);
+                       return retval;
+               }
 
-       /*
-        * We know this is the only CRU call we need to make so lets keep as
-        * few instructions as possible once the NMI comes in.
-        */
-       cmn_regs.u1.rah = 0x0D;
-       cmn_regs.u1.ral = 0x02;
+               /*
+               * We know this is the only CRU call we need to make so lets keep as
+               * few instructions as possible once the NMI comes in.
+               */
+               cmn_regs.u1.rah = 0x0D;
+               cmn_regs.u1.ral = 0x02;
+       }
 
        /*
         * If the priority is set to 1, then we will be put first on the
index 5fd020d..751a591 100644 (file)
@@ -120,72 +120,12 @@ enum iTCO_chipsets {
        TCO_3420,       /* 3420 */
        TCO_3450,       /* 3450 */
        TCO_EP80579,    /* EP80579 */
-       TCO_CPT1,       /* Cougar Point */
-       TCO_CPT2,       /* Cougar Point Desktop */
-       TCO_CPT3,       /* Cougar Point Mobile */
-       TCO_CPT4,       /* Cougar Point */
-       TCO_CPT5,       /* Cougar Point */
-       TCO_CPT6,       /* Cougar Point */
-       TCO_CPT7,       /* Cougar Point */
-       TCO_CPT8,       /* Cougar Point */
-       TCO_CPT9,       /* Cougar Point */
-       TCO_CPT10,      /* Cougar Point */
-       TCO_CPT11,      /* Cougar Point */
-       TCO_CPT12,      /* Cougar Point */
-       TCO_CPT13,      /* Cougar Point */
-       TCO_CPT14,      /* Cougar Point */
-       TCO_CPT15,      /* Cougar Point */
-       TCO_CPT16,      /* Cougar Point */
-       TCO_CPT17,      /* Cougar Point */
-       TCO_CPT18,      /* Cougar Point */
-       TCO_CPT19,      /* Cougar Point */
-       TCO_CPT20,      /* Cougar Point */
-       TCO_CPT21,      /* Cougar Point */
-       TCO_CPT22,      /* Cougar Point */
-       TCO_CPT23,      /* Cougar Point */
-       TCO_CPT24,      /* Cougar Point */
-       TCO_CPT25,      /* Cougar Point */
-       TCO_CPT26,      /* Cougar Point */
-       TCO_CPT27,      /* Cougar Point */
-       TCO_CPT28,      /* Cougar Point */
-       TCO_CPT29,      /* Cougar Point */
-       TCO_CPT30,      /* Cougar Point */
-       TCO_CPT31,      /* Cougar Point */
-       TCO_PBG1,       /* Patsburg */
-       TCO_PBG2,       /* Patsburg */
+       TCO_CPT,        /* Cougar Point */
+       TCO_CPTD,       /* Cougar Point Desktop */
+       TCO_CPTM,       /* Cougar Point Mobile */
+       TCO_PBG,        /* Patsburg */
        TCO_DH89XXCC,   /* DH89xxCC */
-       TCO_PPT0,       /* Panther Point */
-       TCO_PPT1,       /* Panther Point */
-       TCO_PPT2,       /* Panther Point */
-       TCO_PPT3,       /* Panther Point */
-       TCO_PPT4,       /* Panther Point */
-       TCO_PPT5,       /* Panther Point */
-       TCO_PPT6,       /* Panther Point */
-       TCO_PPT7,       /* Panther Point */
-       TCO_PPT8,       /* Panther Point */
-       TCO_PPT9,       /* Panther Point */
-       TCO_PPT10,      /* Panther Point */
-       TCO_PPT11,      /* Panther Point */
-       TCO_PPT12,      /* Panther Point */
-       TCO_PPT13,      /* Panther Point */
-       TCO_PPT14,      /* Panther Point */
-       TCO_PPT15,      /* Panther Point */
-       TCO_PPT16,      /* Panther Point */
-       TCO_PPT17,      /* Panther Point */
-       TCO_PPT18,      /* Panther Point */
-       TCO_PPT19,      /* Panther Point */
-       TCO_PPT20,      /* Panther Point */
-       TCO_PPT21,      /* Panther Point */
-       TCO_PPT22,      /* Panther Point */
-       TCO_PPT23,      /* Panther Point */
-       TCO_PPT24,      /* Panther Point */
-       TCO_PPT25,      /* Panther Point */
-       TCO_PPT26,      /* Panther Point */
-       TCO_PPT27,      /* Panther Point */
-       TCO_PPT28,      /* Panther Point */
-       TCO_PPT29,      /* Panther Point */
-       TCO_PPT30,      /* Panther Point */
-       TCO_PPT31,      /* Panther Point */
+       TCO_PPT,        /* Panther Point */
 };
 
 static struct {
@@ -244,83 +184,14 @@ static struct {
        {"3450", 2},
        {"EP80579", 2},
        {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Cougar Point", 2},
-       {"Patsburg", 2},
+       {"Cougar Point Desktop", 2},
+       {"Cougar Point Mobile", 2},
        {"Patsburg", 2},
        {"DH89xxCC", 2},
        {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
-       {"Panther Point", 2},
        {NULL, 0}
 };
 
-#define ITCO_PCI_DEVICE(dev, data) \
-       .vendor = PCI_VENDOR_ID_INTEL,  \
-       .device = dev,                  \
-       .subvendor = PCI_ANY_ID,        \
-       .subdevice = PCI_ANY_ID,        \
-       .class = 0,                     \
-       .class_mask = 0,                \
-       .driver_data = data
-
 /*
  * This data only exists for exporting the supported PCI ids
  * via MODULE_DEVICE_TABLE.  We do not actually register a
@@ -328,138 +199,138 @@ static struct {
  * functions that probably will be registered by other drivers.
  */
 static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = {
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AA_0,        TCO_ICH)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AB_0,        TCO_ICH0)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_0,        TCO_ICH2)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_10,       TCO_ICH2M)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_0,        TCO_ICH3)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_12,       TCO_ICH3M)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_0,        TCO_ICH4)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_12,       TCO_ICH4M)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801E_0,         TCO_CICH)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801EB_0,        TCO_ICH5)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB_1,            TCO_6300ESB)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_0,           TCO_ICH6)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_1,           TCO_ICH6M)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_2,           TCO_ICH6W)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB2_0,           TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2671,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2672,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2673,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2674,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2675,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2676,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2677,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2678,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x2679,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x267a,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x267b,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x267c,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x267d,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x267e,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(0x267f,                               TCO_631XESB)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_0,           TCO_ICH7)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_30,          TCO_ICH7DH)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_1,           TCO_ICH7M)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_31,          TCO_ICH7MDH)},
-       { ITCO_PCI_DEVICE(0x27bc,                               TCO_NM10)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_0,           TCO_ICH8)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_2,           TCO_ICH8DH)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_3,           TCO_ICH8DO)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_4,           TCO_ICH8M)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_1,           TCO_ICH8ME)},
-       { ITCO_PCI_DEVICE(0x2918,                               TCO_ICH9)},
-       { ITCO_PCI_DEVICE(0x2916,                               TCO_ICH9R)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_2,           TCO_ICH9DH)},
-       { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_4,           TCO_ICH9DO)},
-       { ITCO_PCI_DEVICE(0x2919,                               TCO_ICH9M)},
-       { ITCO_PCI_DEVICE(0x2917,                               TCO_ICH9ME)},
-       { ITCO_PCI_DEVICE(0x3a18,                               TCO_ICH10)},
-       { ITCO_PCI_DEVICE(0x3a16,                               TCO_ICH10R)},
-       { ITCO_PCI_DEVICE(0x3a1a,                               TCO_ICH10D)},
-       { ITCO_PCI_DEVICE(0x3a14,                               TCO_ICH10DO)},
-       { ITCO_PCI_DEVICE(0x3b00,                               TCO_PCH)},
-       { ITCO_PCI_DEVICE(0x3b01,                               TCO_PCHM)},
-       { ITCO_PCI_DEVICE(0x3b02,                               TCO_P55)},
-       { ITCO_PCI_DEVICE(0x3b03,                               TCO_PM55)},
-       { ITCO_PCI_DEVICE(0x3b06,                               TCO_H55)},
-       { ITCO_PCI_DEVICE(0x3b07,                               TCO_QM57)},
-       { ITCO_PCI_DEVICE(0x3b08,                               TCO_H57)},
-       { ITCO_PCI_DEVICE(0x3b09,                               TCO_HM55)},
-       { ITCO_PCI_DEVICE(0x3b0a,                               TCO_Q57)},
-       { ITCO_PCI_DEVICE(0x3b0b,                               TCO_HM57)},
-       { ITCO_PCI_DEVICE(0x3b0d,                               TCO_PCHMSFF)},
-       { ITCO_PCI_DEVICE(0x3b0f,                               TCO_QS57)},
-       { ITCO_PCI_DEVICE(0x3b12,                               TCO_3400)},
-       { ITCO_PCI_DEVICE(0x3b14,                               TCO_3420)},
-       { ITCO_PCI_DEVICE(0x3b16,                               TCO_3450)},
-       { ITCO_PCI_DEVICE(0x5031,                               TCO_EP80579)},
-       { ITCO_PCI_DEVICE(0x1c41,                               TCO_CPT1)},
-       { ITCO_PCI_DEVICE(0x1c42,                               TCO_CPT2)},
-       { ITCO_PCI_DEVICE(0x1c43,                               TCO_CPT3)},
-       { ITCO_PCI_DEVICE(0x1c44,                               TCO_CPT4)},
-       { ITCO_PCI_DEVICE(0x1c45,                               TCO_CPT5)},
-       { ITCO_PCI_DEVICE(0x1c46,                               TCO_CPT6)},
-       { ITCO_PCI_DEVICE(0x1c47,                               TCO_CPT7)},
-       { ITCO_PCI_DEVICE(0x1c48,                               TCO_CPT8)},
-       { ITCO_PCI_DEVICE(0x1c49,                               TCO_CPT9)},
-       { ITCO_PCI_DEVICE(0x1c4a,                               TCO_CPT10)},
-       { ITCO_PCI_DEVICE(0x1c4b,                               TCO_CPT11)},
-       { ITCO_PCI_DEVICE(0x1c4c,                               TCO_CPT12)},
-       { ITCO_PCI_DEVICE(0x1c4d,                               TCO_CPT13)},
-       { ITCO_PCI_DEVICE(0x1c4e,                               TCO_CPT14)},
-       { ITCO_PCI_DEVICE(0x1c4f,                               TCO_CPT15)},
-       { ITCO_PCI_DEVICE(0x1c50,                               TCO_CPT16)},
-       { ITCO_PCI_DEVICE(0x1c51,                               TCO_CPT17)},
-       { ITCO_PCI_DEVICE(0x1c52,                               TCO_CPT18)},
-       { ITCO_PCI_DEVICE(0x1c53,                               TCO_CPT19)},
-       { ITCO_PCI_DEVICE(0x1c54,                               TCO_CPT20)},
-       { ITCO_PCI_DEVICE(0x1c55,                               TCO_CPT21)},
-       { ITCO_PCI_DEVICE(0x1c56,                               TCO_CPT22)},
-       { ITCO_PCI_DEVICE(0x1c57,                               TCO_CPT23)},
-       { ITCO_PCI_DEVICE(0x1c58,                               TCO_CPT24)},
-       { ITCO_PCI_DEVICE(0x1c59,                               TCO_CPT25)},
-       { ITCO_PCI_DEVICE(0x1c5a,                               TCO_CPT26)},
-       { ITCO_PCI_DEVICE(0x1c5b,                               TCO_CPT27)},
-       { ITCO_PCI_DEVICE(0x1c5c,                               TCO_CPT28)},
-       { ITCO_PCI_DEVICE(0x1c5d,                               TCO_CPT29)},
-       { ITCO_PCI_DEVICE(0x1c5e,                               TCO_CPT30)},
-       { ITCO_PCI_DEVICE(0x1c5f,                               TCO_CPT31)},
-       { ITCO_PCI_DEVICE(0x1d40,                               TCO_PBG1)},
-       { ITCO_PCI_DEVICE(0x1d41,                               TCO_PBG2)},
-       { ITCO_PCI_DEVICE(0x2310,                               TCO_DH89XXCC)},
-       { ITCO_PCI_DEVICE(0x1e40,                               TCO_PPT0)},
-       { ITCO_PCI_DEVICE(0x1e41,                               TCO_PPT1)},
-       { ITCO_PCI_DEVICE(0x1e42,                               TCO_PPT2)},
-       { ITCO_PCI_DEVICE(0x1e43,                               TCO_PPT3)},
-       { ITCO_PCI_DEVICE(0x1e44,                               TCO_PPT4)},
-       { ITCO_PCI_DEVICE(0x1e45,                               TCO_PPT5)},
-       { ITCO_PCI_DEVICE(0x1e46,                               TCO_PPT6)},
-       { ITCO_PCI_DEVICE(0x1e47,                               TCO_PPT7)},
-       { ITCO_PCI_DEVICE(0x1e48,                               TCO_PPT8)},
-       { ITCO_PCI_DEVICE(0x1e49,                               TCO_PPT9)},
-       { ITCO_PCI_DEVICE(0x1e4a,                               TCO_PPT10)},
-       { ITCO_PCI_DEVICE(0x1e4b,                               TCO_PPT11)},
-       { ITCO_PCI_DEVICE(0x1e4c,                               TCO_PPT12)},
-       { ITCO_PCI_DEVICE(0x1e4d,                               TCO_PPT13)},
-       { ITCO_PCI_DEVICE(0x1e4e,                               TCO_PPT14)},
-       { ITCO_PCI_DEVICE(0x1e4f,                               TCO_PPT15)},
-       { ITCO_PCI_DEVICE(0x1e50,                               TCO_PPT16)},
-       { ITCO_PCI_DEVICE(0x1e51,                               TCO_PPT17)},
-       { ITCO_PCI_DEVICE(0x1e52,                               TCO_PPT18)},
-       { ITCO_PCI_DEVICE(0x1e53,                               TCO_PPT19)},
-       { ITCO_PCI_DEVICE(0x1e54,                               TCO_PPT20)},
-       { ITCO_PCI_DEVICE(0x1e55,                               TCO_PPT21)},
-       { ITCO_PCI_DEVICE(0x1e56,                               TCO_PPT22)},
-       { ITCO_PCI_DEVICE(0x1e57,                               TCO_PPT23)},
-       { ITCO_PCI_DEVICE(0x1e58,                               TCO_PPT24)},
-       { ITCO_PCI_DEVICE(0x1e59,                               TCO_PPT25)},
-       { ITCO_PCI_DEVICE(0x1e5a,                               TCO_PPT26)},
-       { ITCO_PCI_DEVICE(0x1e5b,                               TCO_PPT27)},
-       { ITCO_PCI_DEVICE(0x1e5c,                               TCO_PPT28)},
-       { ITCO_PCI_DEVICE(0x1e5d,                               TCO_PPT29)},
-       { ITCO_PCI_DEVICE(0x1e5e,                               TCO_PPT30)},
-       { ITCO_PCI_DEVICE(0x1e5f,                               TCO_PPT31)},
+       { PCI_VDEVICE(INTEL, 0x2410), TCO_ICH},
+       { PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0},
+       { PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2},
+       { PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M},
+       { PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3},
+       { PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M},
+       { PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4},
+       { PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M},
+       { PCI_VDEVICE(INTEL, 0x2450), TCO_CICH},
+       { PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5},
+       { PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB},
+       { PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6},
+       { PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M},
+       { PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W},
+       { PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB},
+       { PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7},
+       { PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH},
+       { PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M},
+       { PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH},
+       { PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10},
+       { PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8},
+       { PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH},
+       { PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO},
+       { PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M},
+       { PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME},
+       { PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9},
+       { PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R},
+       { PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH},
+       { PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO},
+       { PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M},
+       { PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME},
+       { PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10},
+       { PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R},
+       { PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D},
+       { PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO},
+       { PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH},
+       { PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM},
+       { PCI_VDEVICE(INTEL, 0x3b02), TCO_P55},
+       { PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55},
+       { PCI_VDEVICE(INTEL, 0x3b06), TCO_H55},
+       { PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57},
+       { PCI_VDEVICE(INTEL, 0x3b08), TCO_H57},
+       { PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55},
+       { PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57},
+       { PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57},
+       { PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF},
+       { PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57},
+       { PCI_VDEVICE(INTEL, 0x3b12), TCO_3400},
+       { PCI_VDEVICE(INTEL, 0x3b14), TCO_3420},
+       { PCI_VDEVICE(INTEL, 0x3b16), TCO_3450},
+       { PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579},
+       { PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD},
+       { PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM},
+       { PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT},
+       { PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG},
+       { PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG},
+       { PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC},
+       { PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT},
+       { PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT},
        { 0, },                 /* End of list */
 };
 MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
@@ -1052,15 +923,10 @@ static void iTCO_wdt_shutdown(struct platform_device *dev)
        iTCO_wdt_stop();
 }
 
-#define iTCO_wdt_suspend NULL
-#define iTCO_wdt_resume  NULL
-
 static struct platform_driver iTCO_wdt_driver = {
        .probe          = iTCO_wdt_probe,
        .remove         = __devexit_p(iTCO_wdt_remove),
        .shutdown       = iTCO_wdt_shutdown,
-       .suspend        = iTCO_wdt_suspend,
-       .resume         = iTCO_wdt_resume,
        .driver         = {
                .owner  = THIS_MODULE,
                .name   = DRV_NAME,
index 86f7cac..b8ef2c6 100644 (file)
@@ -329,12 +329,18 @@ static void imx2_wdt_shutdown(struct platform_device *pdev)
        }
 }
 
+static const struct of_device_id imx2_wdt_dt_ids[] = {
+       { .compatible = "fsl,imx21-wdt", },
+       { /* sentinel */ }
+};
+
 static struct platform_driver imx2_wdt_driver = {
        .remove         = __exit_p(imx2_wdt_remove),
        .shutdown       = imx2_wdt_shutdown,
        .driver         = {
                .name   = DRIVER_NAME,
                .owner  = THIS_MODULE,
+               .of_match_table = imx2_wdt_dt_ids,
        },
 };
 
index 6143f52..8d2d850 100644 (file)
 #include <linux/notifier.h>
 #include <linux/reboot.h>
 #include <linux/fs.h>
-#include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/ioport.h>
 
 #define NAME "it8712f_wdt"
 
@@ -51,7 +51,6 @@ MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close");
 
 static unsigned long wdt_open;
 static unsigned expect_close;
-static spinlock_t io_lock;
 static unsigned char revision;
 
 /* Dog Food address - We use the game port address */
@@ -121,20 +120,26 @@ static inline void superio_select(int ldn)
        outb(ldn, VAL);
 }
 
-static inline void superio_enter(void)
+static inline int superio_enter(void)
 {
-       spin_lock(&io_lock);
+       /*
+        * Try to reserve REG and REG + 1 for exclusive access.
+        */
+       if (!request_muxed_region(REG, 2, NAME))
+               return -EBUSY;
+
        outb(0x87, REG);
        outb(0x01, REG);
        outb(0x55, REG);
        outb(0x55, REG);
+       return 0;
 }
 
 static inline void superio_exit(void)
 {
        outb(0x02, REG);
        outb(0x02, VAL);
-       spin_unlock(&io_lock);
+       release_region(REG, 2);
 }
 
 static inline void it8712f_wdt_ping(void)
@@ -173,10 +178,13 @@ static int it8712f_wdt_get_status(void)
                return 0;
 }
 
-static void it8712f_wdt_enable(void)
+static int it8712f_wdt_enable(void)
 {
+       int ret = superio_enter();
+       if (ret)
+               return ret;
+
        printk(KERN_DEBUG NAME ": enabling watchdog timer\n");
-       superio_enter();
        superio_select(LDN_GPIO);
 
        superio_outb(wdt_control_reg, WDT_CONTROL);
@@ -186,13 +194,17 @@ static void it8712f_wdt_enable(void)
        superio_exit();
 
        it8712f_wdt_ping();
+
+       return 0;
 }
 
-static void it8712f_wdt_disable(void)
+static int it8712f_wdt_disable(void)
 {
-       printk(KERN_DEBUG NAME ": disabling watchdog timer\n");
+       int ret = superio_enter();
+       if (ret)
+               return ret;
 
-       superio_enter();
+       printk(KERN_DEBUG NAME ": disabling watchdog timer\n");
        superio_select(LDN_GPIO);
 
        superio_outb(0, WDT_CONFIG);
@@ -202,6 +214,7 @@ static void it8712f_wdt_disable(void)
        superio_outb(0, WDT_TIMEOUT);
 
        superio_exit();
+       return 0;
 }
 
 static int it8712f_wdt_notify(struct notifier_block *this,
@@ -252,6 +265,7 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd,
                                                WDIOF_MAGICCLOSE,
        };
        int value;
+       int ret;
 
        switch (cmd) {
        case WDIOC_GETSUPPORT:
@@ -259,7 +273,9 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd,
                        return -EFAULT;
                return 0;
        case WDIOC_GETSTATUS:
-               superio_enter();
+               ret = superio_enter();
+               if (ret)
+                       return ret;
                superio_select(LDN_GPIO);
 
                value = it8712f_wdt_get_status();
@@ -280,7 +296,9 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd,
                if (value > (max_units * 60))
                        return -EINVAL;
                margin = value;
-               superio_enter();
+               ret = superio_enter();
+               if (ret)
+                       return ret;
                superio_select(LDN_GPIO);
 
                it8712f_wdt_update_margin();
@@ -299,10 +317,14 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd,
 
 static int it8712f_wdt_open(struct inode *inode, struct file *file)
 {
+       int ret;
        /* only allow one at a time */
        if (test_and_set_bit(0, &wdt_open))
                return -EBUSY;
-       it8712f_wdt_enable();
+
+       ret = it8712f_wdt_enable();
+       if (ret)
+               return ret;
        return nonseekable_open(inode, file);
 }
 
@@ -313,7 +335,8 @@ static int it8712f_wdt_release(struct inode *inode, struct file *file)
                        ": watchdog device closed unexpectedly, will not"
                        " disable the watchdog timer\n");
        } else if (!nowayout) {
-               it8712f_wdt_disable();
+               if (it8712f_wdt_disable())
+                       printk(KERN_WARNING NAME "Watchdog disable failed\n");
        }
        expect_close = 0;
        clear_bit(0, &wdt_open);
@@ -340,8 +363,10 @@ static int __init it8712f_wdt_find(unsigned short *address)
 {
        int err = -ENODEV;
        int chip_type;
+       int ret = superio_enter();
+       if (ret)
+               return ret;
 
-       superio_enter();
        chip_type = superio_inw(DEVID);
        if (chip_type != IT8712F_DEVID)
                goto exit;
@@ -382,8 +407,6 @@ static int __init it8712f_wdt_init(void)
 {
        int err = 0;
 
-       spin_lock_init(&io_lock);
-
        if (it8712f_wdt_find(&address))
                return -ENODEV;
 
@@ -392,7 +415,11 @@ static int __init it8712f_wdt_init(void)
                return -EBUSY;
        }
 
-       it8712f_wdt_disable();
+       err = it8712f_wdt_disable();
+       if (err) {
+               printk(KERN_ERR NAME ": unable to disable watchdog timer.\n");
+               goto out;
+       }
 
        err = register_reboot_notifier(&it8712f_wdt_notifier);
        if (err) {
index b1bc72f..a2d9a12 100644 (file)
 
 static unsigned int base, gpact, ciract, max_units, chip_type;
 static unsigned long wdt_status;
-static DEFINE_SPINLOCK(spinlock);
 
 static int nogameport = DEFAULT_NOGAMEPORT;
 static int exclusive  = DEFAULT_EXCLUSIVE;
@@ -163,18 +162,26 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started, default="
 
 /* Superio Chip */
 
-static inline void superio_enter(void)
+static inline int superio_enter(void)
 {
+       /*
+        * Try to reserve REG and REG + 1 for exclusive access.
+        */
+       if (!request_muxed_region(REG, 2, WATCHDOG_NAME))
+               return -EBUSY;
+
        outb(0x87, REG);
        outb(0x01, REG);
        outb(0x55, REG);
        outb(0x55, REG);
+       return 0;
 }
 
 static inline void superio_exit(void)
 {
        outb(0x02, REG);
        outb(0x02, VAL);
+       release_region(REG, 2);
 }
 
 static inline void superio_select(int ldn)
@@ -255,12 +262,11 @@ static void wdt_keepalive(void)
        set_bit(WDTS_KEEPALIVE, &wdt_status);
 }
 
-static void wdt_start(void)
+static int wdt_start(void)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&spinlock, flags);
-       superio_enter();
+       int ret = superio_enter();
+       if (ret)
+               return ret;
 
        superio_select(GPIO);
        if (test_bit(WDTS_USE_GP, &wdt_status))
@@ -270,15 +276,15 @@ static void wdt_start(void)
        wdt_update_timeout();
 
        superio_exit();
-       spin_unlock_irqrestore(&spinlock, flags);
+
+       return 0;
 }
 
-static void wdt_stop(void)
+static int wdt_stop(void)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&spinlock, flags);
-       superio_enter();
+       int ret = superio_enter();
+       if (ret)
+               return ret;
 
        superio_select(GPIO);
        superio_outb(0x00, WDTCTRL);
@@ -288,7 +294,7 @@ static void wdt_stop(void)
                superio_outb(0x00, WDTVALMSB);
 
        superio_exit();
-       spin_unlock_irqrestore(&spinlock, flags);
+       return 0;
 }
 
 /**
@@ -303,8 +309,6 @@ static void wdt_stop(void)
 
 static int wdt_set_timeout(int t)
 {
-       unsigned long flags;
-
        if (t < 1 || t > max_units * 60)
                return -EINVAL;
 
@@ -313,14 +317,15 @@ static int wdt_set_timeout(int t)
        else
                timeout = t;
 
-       spin_lock_irqsave(&spinlock, flags);
        if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-               superio_enter();
+               int ret = superio_enter();
+               if (ret)
+                       return ret;
+
                superio_select(GPIO);
                wdt_update_timeout();
                superio_exit();
        }
-       spin_unlock_irqrestore(&spinlock, flags);
        return 0;
 }
 
@@ -339,12 +344,12 @@ static int wdt_set_timeout(int t)
 
 static int wdt_get_status(int *status)
 {
-       unsigned long flags;
-
        *status = 0;
        if (testmode) {
-               spin_lock_irqsave(&spinlock, flags);
-               superio_enter();
+               int ret = superio_enter();
+               if (ret)
+                       return ret;
+
                superio_select(GPIO);
                if (superio_inb(WDTCTRL) & WDT_ZERO) {
                        superio_outb(0x00, WDTCTRL);
@@ -353,7 +358,6 @@ static int wdt_get_status(int *status)
                }
 
                superio_exit();
-               spin_unlock_irqrestore(&spinlock, flags);
        }
        if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status))
                *status |= WDIOF_KEEPALIVEPING;
@@ -379,9 +383,17 @@ static int wdt_open(struct inode *inode, struct file *file)
        if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status))
                return -EBUSY;
        if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
+               int ret;
                if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status))
                        __module_get(THIS_MODULE);
-               wdt_start();
+
+               ret = wdt_start();
+               if (ret) {
+                       clear_bit(WDTS_LOCKED, &wdt_status);
+                       clear_bit(WDTS_TIMER_RUN, &wdt_status);
+                       clear_bit(WDTS_DEV_OPEN, &wdt_status);
+                       return ret;
+               }
        }
        return nonseekable_open(inode, file);
 }
@@ -403,7 +415,16 @@ static int wdt_release(struct inode *inode, struct file *file)
 {
        if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
                if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) {
-                       wdt_stop();
+                       int ret = wdt_stop();
+                       if (ret) {
+                               /*
+                                * Stop failed. Just keep the watchdog alive
+                                * and hope nothing bad happens.
+                                */
+                               set_bit(WDTS_EXPECTED, &wdt_status);
+                               wdt_keepalive();
+                               return ret;
+                       }
                        clear_bit(WDTS_TIMER_RUN, &wdt_status);
                } else {
                        wdt_keepalive();
@@ -484,7 +505,9 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                                    &ident, sizeof(ident)) ? -EFAULT : 0;
 
        case WDIOC_GETSTATUS:
-               wdt_get_status(&status);
+               rc = wdt_get_status(&status);
+               if (rc)
+                       return rc;
                return put_user(status, uarg.i);
 
        case WDIOC_GETBOOTSTATUS:
@@ -500,14 +523,22 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
                switch (new_options) {
                case WDIOS_DISABLECARD:
-                       if (test_bit(WDTS_TIMER_RUN, &wdt_status))
-                               wdt_stop();
+                       if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
+                               rc = wdt_stop();
+                               if (rc)
+                                       return rc;
+                       }
                        clear_bit(WDTS_TIMER_RUN, &wdt_status);
                        return 0;
 
                case WDIOS_ENABLECARD:
-                       if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status))
-                               wdt_start();
+                       if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
+                               rc = wdt_start();
+                               if (rc) {
+                                       clear_bit(WDTS_TIMER_RUN, &wdt_status);
+                                       return rc;
+                               }
+                       }
                        return 0;
 
                default:
@@ -560,16 +591,17 @@ static int __init it87_wdt_init(void)
        int rc = 0;
        int try_gameport = !nogameport;
        u8  chip_rev;
-       unsigned long flags;
+       int gp_rreq_fail = 0;
 
        wdt_status = 0;
 
-       spin_lock_irqsave(&spinlock, flags);
-       superio_enter();
+       rc = superio_enter();
+       if (rc)
+               return rc;
+
        chip_type = superio_inw(CHIPID);
        chip_rev  = superio_inb(CHIPREV) & 0x0f;
        superio_exit();
-       spin_unlock_irqrestore(&spinlock, flags);
 
        switch (chip_type) {
        case IT8702_ID:
@@ -603,8 +635,9 @@ static int __init it87_wdt_init(void)
                return -ENODEV;
        }
 
-       spin_lock_irqsave(&spinlock, flags);
-       superio_enter();
+       rc = superio_enter();
+       if (rc)
+               return rc;
 
        superio_select(GPIO);
        superio_outb(WDT_TOV1, WDTCFG);
@@ -620,21 +653,16 @@ static int __init it87_wdt_init(void)
                }
                gpact = superio_inb(ACTREG);
                superio_outb(0x01, ACTREG);
-               superio_exit();
-               spin_unlock_irqrestore(&spinlock, flags);
                if (request_region(base, 1, WATCHDOG_NAME))
                        set_bit(WDTS_USE_GP, &wdt_status);
                else
-                       rc = -EIO;
-       } else {
-               superio_exit();
-               spin_unlock_irqrestore(&spinlock, flags);
+                       gp_rreq_fail = 1;
        }
 
        /* If we haven't Gameport support, try to get CIR support */
        if (!test_bit(WDTS_USE_GP, &wdt_status)) {
                if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) {
-                       if (rc == -EIO)
+                       if (gp_rreq_fail)
                                printk(KERN_ERR PFX
                                        "I/O Address 0x%04x and 0x%04x"
                                        " already in use\n", base, CIR_BASE);
@@ -646,21 +674,16 @@ static int __init it87_wdt_init(void)
                        goto err_out;
                }
                base = CIR_BASE;
-               spin_lock_irqsave(&spinlock, flags);
-               superio_enter();
 
                superio_select(CIR);
                superio_outw(base, BASEREG);
                superio_outb(0x00, CIR_ILS);
                ciract = superio_inb(ACTREG);
                superio_outb(0x01, ACTREG);
-               if (rc == -EIO) {
+               if (gp_rreq_fail) {
                        superio_select(GAMEPORT);
                        superio_outb(gpact, ACTREG);
                }
-
-               superio_exit();
-               spin_unlock_irqrestore(&spinlock, flags);
        }
 
        if (timeout < 1 || timeout > max_units * 60) {
@@ -704,6 +727,7 @@ static int __init it87_wdt_init(void)
                "nogameport=%d)\n", chip_type, chip_rev, timeout,
                nowayout, testmode, exclusive, nogameport);
 
+       superio_exit();
        return 0;
 
 err_out_reboot:
@@ -711,49 +735,37 @@ err_out_reboot:
 err_out_region:
        release_region(base, test_bit(WDTS_USE_GP, &wdt_status) ? 1 : 8);
        if (!test_bit(WDTS_USE_GP, &wdt_status)) {
-               spin_lock_irqsave(&spinlock, flags);
-               superio_enter();
                superio_select(CIR);
                superio_outb(ciract, ACTREG);
-               superio_exit();
-               spin_unlock_irqrestore(&spinlock, flags);
        }
 err_out:
        if (try_gameport) {
-               spin_lock_irqsave(&spinlock, flags);
-               superio_enter();
                superio_select(GAMEPORT);
                superio_outb(gpact, ACTREG);
-               superio_exit();
-               spin_unlock_irqrestore(&spinlock, flags);
        }
 
+       superio_exit();
        return rc;
 }
 
 static void __exit it87_wdt_exit(void)
 {
-       unsigned long flags;
-       int nolock;
-
-       nolock = !spin_trylock_irqsave(&spinlock, flags);
-       superio_enter();
-       superio_select(GPIO);
-       superio_outb(0x00, WDTCTRL);
-       superio_outb(0x00, WDTCFG);
-       superio_outb(0x00, WDTVALLSB);
-       if (max_units > 255)
-               superio_outb(0x00, WDTVALMSB);
-       if (test_bit(WDTS_USE_GP, &wdt_status)) {
-               superio_select(GAMEPORT);
-               superio_outb(gpact, ACTREG);
-       } else {
-               superio_select(CIR);
-               superio_outb(ciract, ACTREG);
+       if (superio_enter() == 0) {
+               superio_select(GPIO);
+               superio_outb(0x00, WDTCTRL);
+               superio_outb(0x00, WDTCFG);
+               superio_outb(0x00, WDTVALLSB);
+               if (max_units > 255)
+                       superio_outb(0x00, WDTVALMSB);
+               if (test_bit(WDTS_USE_GP, &wdt_status)) {
+                       superio_select(GAMEPORT);
+                       superio_outb(gpact, ACTREG);
+               } else {
+                       superio_select(CIR);
+                       superio_outb(ciract, ACTREG);
+               }
+               superio_exit();
        }
-       superio_exit();
-       if (!nolock)
-               spin_unlock_irqrestore(&spinlock, flags);
 
        misc_deregister(&wdt_miscdev);
        unregister_reboot_notifier(&wdt_notifier);
index 2b4af22..4dc3102 100644 (file)
@@ -407,12 +407,35 @@ static int __devexit mpcore_wdt_remove(struct platform_device *dev)
        return 0;
 }
 
+#ifdef CONFIG_PM
+static int mpcore_wdt_suspend(struct platform_device *dev, pm_message_t msg)
+{
+       struct mpcore_wdt *wdt = platform_get_drvdata(dev);
+       mpcore_wdt_stop(wdt);           /* Turn the WDT off */
+       return 0;
+}
+
+static int mpcore_wdt_resume(struct platform_device *dev)
+{
+       struct mpcore_wdt *wdt = platform_get_drvdata(dev);
+       /* re-activate timer */
+       if (test_bit(0, &wdt->timer_alive))
+               mpcore_wdt_start(wdt);
+       return 0;
+}
+#else
+#define mpcore_wdt_suspend     NULL
+#define mpcore_wdt_resume      NULL
+#endif
+
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:mpcore_wdt");
 
 static struct platform_driver mpcore_wdt_driver = {
        .probe          = mpcore_wdt_probe,
        .remove         = __devexit_p(mpcore_wdt_remove),
+       .suspend        = mpcore_wdt_suspend,
+       .resume         = mpcore_wdt_resume,
        .shutdown       = mpcore_wdt_shutdown,
        .driver         = {
                .owner  = THIS_MODULE,
index 0430e09..ac37bb8 100644 (file)
@@ -225,11 +225,11 @@ static int __devinit mtx1_wdt_probe(struct platform_device *pdev)
 
        ret = misc_register(&mtx1_wdt_misc);
        if (ret < 0) {
-               printk(KERN_ERR " mtx-1_wdt : failed to register\n");
+               dev_err(&pdev->dev, "failed to register\n");
                return ret;
        }
        mtx1_wdt_start();
-       printk(KERN_INFO "MTX-1 Watchdog driver\n");
+       dev_info(&pdev->dev, "MTX-1 Watchdog driver\n");
        return 0;
 }
 
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
new file mode 100644 (file)
index 0000000..4ec741a
--- /dev/null
@@ -0,0 +1,433 @@
+/*
+*   of_xilinx_wdt.c  1.01  A Watchdog Device Driver for Xilinx xps_timebase_wdt
+*
+*   (C) Copyright 2011 (Alejandro Cabrera <aldaya@gmail.com>)
+*
+*       -----------------------
+*
+*   This program is free software; you can redistribute it and/or
+*   modify it under the terms of the GNU General Public License
+*   as published by the Free Software Foundation; either version
+*   2 of the License, or (at your option) any later version.
+*
+*       -----------------------
+*      30-May-2011 Alejandro Cabrera <aldaya@gmail.com>
+*              - If "xlnx,wdt-enable-once" wasn't found on device tree the
+*                module will use CONFIG_WATCHDOG_NOWAYOUT
+*              - If the device tree parameters ("clock-frequency" and
+*                "xlnx,wdt-interval") wasn't found the driver won't
+*                know the wdt reset interval
+*/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/watchdog.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+
+/* Register offsets for the Wdt device */
+#define XWT_TWCSR0_OFFSET   0x0 /* Control/Status Register0 */
+#define XWT_TWCSR1_OFFSET   0x4 /* Control/Status Register1 */
+#define XWT_TBR_OFFSET      0x8 /* Timebase Register Offset */
+
+/* Control/Status Register Masks  */
+#define XWT_CSR0_WRS_MASK   0x00000008 /* Reset status */
+#define XWT_CSR0_WDS_MASK   0x00000004 /* Timer state  */
+#define XWT_CSR0_EWDT1_MASK 0x00000002 /* Enable bit 1 */
+
+/* Control/Status Register 0/1 bits  */
+#define XWT_CSRX_EWDT2_MASK 0x00000001 /* Enable bit 2 */
+
+/* SelfTest constants */
+#define XWT_MAX_SELFTEST_LOOP_COUNT 0x00010000
+#define XWT_TIMER_FAILED            0xFFFFFFFF
+
+#define WATCHDOG_NAME     "Xilinx Watchdog"
+#define PFX WATCHDOG_NAME ": "
+
+struct xwdt_device {
+       struct resource  res;
+       void __iomem *base;
+       u32 nowayout;
+       u32 wdt_interval;
+       u32 boot_status;
+};
+
+static struct xwdt_device xdev;
+
+static  u32 timeout;
+static  u32 control_status_reg;
+static  u8  expect_close;
+static  u8  no_timeout;
+static unsigned long driver_open;
+
+static  DEFINE_SPINLOCK(spinlock);
+
+static void xwdt_start(void)
+{
+       spin_lock(&spinlock);
+
+       /* Clean previous status and enable the watchdog timer */
+       control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+       control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK);
+
+       iowrite32((control_status_reg | XWT_CSR0_EWDT1_MASK),
+                               xdev.base + XWT_TWCSR0_OFFSET);
+
+       iowrite32(XWT_CSRX_EWDT2_MASK, xdev.base + XWT_TWCSR1_OFFSET);
+
+       spin_unlock(&spinlock);
+}
+
+static void xwdt_stop(void)
+{
+       spin_lock(&spinlock);
+
+       control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+
+       iowrite32((control_status_reg & ~XWT_CSR0_EWDT1_MASK),
+                               xdev.base + XWT_TWCSR0_OFFSET);
+
+       iowrite32(0, xdev.base + XWT_TWCSR1_OFFSET);
+
+       spin_unlock(&spinlock);
+       printk(KERN_INFO PFX "Stopped!\n");
+}
+
+static void xwdt_keepalive(void)
+{
+       spin_lock(&spinlock);
+
+       control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+       control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK);
+       iowrite32(control_status_reg, xdev.base + XWT_TWCSR0_OFFSET);
+
+       spin_unlock(&spinlock);
+}
+
+static void xwdt_get_status(int *status)
+{
+       int new_status;
+
+       spin_lock(&spinlock);
+
+       control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+       new_status = ((control_status_reg &
+                       (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK)) != 0);
+       spin_unlock(&spinlock);
+
+       *status = 0;
+       if (new_status & 1)
+               *status |= WDIOF_CARDRESET;
+}
+
+static u32 xwdt_selftest(void)
+{
+       int i;
+       u32 timer_value1;
+       u32 timer_value2;
+
+       spin_lock(&spinlock);
+
+       timer_value1 = ioread32(xdev.base + XWT_TBR_OFFSET);
+       timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET);
+
+       for (i = 0;
+               ((i <= XWT_MAX_SELFTEST_LOOP_COUNT) &&
+                       (timer_value2 == timer_value1)); i++) {
+               timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET);
+       }
+
+       spin_unlock(&spinlock);
+
+       if (timer_value2 != timer_value1)
+               return ~XWT_TIMER_FAILED;
+       else
+               return XWT_TIMER_FAILED;
+}
+
+static int xwdt_open(struct inode *inode, struct file *file)
+{
+       /* Only one process can handle the wdt at a time */
+       if (test_and_set_bit(0, &driver_open))
+               return -EBUSY;
+
+       /* Make sure that the module are always loaded...*/
+       if (xdev.nowayout)
+               __module_get(THIS_MODULE);
+
+       xwdt_start();
+       printk(KERN_INFO PFX "Started...\n");
+
+       return nonseekable_open(inode, file);
+}
+
+static int xwdt_release(struct inode *inode, struct file *file)
+{
+       if (expect_close == 42) {
+               xwdt_stop();
+       } else {
+               printk(KERN_CRIT PFX
+                       "Unexpected close, not stopping watchdog!\n");
+               xwdt_keepalive();
+       }
+
+       clear_bit(0, &driver_open);
+       expect_close = 0;
+       return 0;
+}
+
+/*
+ *      xwdt_write:
+ *      @file: file handle to the watchdog
+ *      @buf: buffer to write (unused as data does not matter here
+ *      @count: count of bytes
+ *      @ppos: pointer to the position to write. No seeks allowed
+ *
+ *      A write to a watchdog device is defined as a keepalive signal. Any
+ *      write of data will do, as we don't define content meaning.
+ */
+static ssize_t xwdt_write(struct file *file, const char __user *buf,
+                                               size_t len, loff_t *ppos)
+{
+       if (len) {
+               if (!xdev.nowayout) {
+                       size_t i;
+
+                       /* In case it was set long ago */
+                       expect_close = 0;
+
+                       for (i = 0; i != len; i++) {
+                               char c;
+
+                               if (get_user(c, buf + i))
+                                       return -EFAULT;
+                               if (c == 'V')
+                                       expect_close = 42;
+                       }
+               }
+               xwdt_keepalive();
+       }
+       return len;
+}
+
+static const struct watchdog_info ident = {
+       .options =  WDIOF_MAGICCLOSE |
+                   WDIOF_KEEPALIVEPING,
+       .firmware_version =     1,
+       .identity =     WATCHDOG_NAME,
+};
+
+/*
+ *      xwdt_ioctl:
+ *      @file: file handle to the device
+ *      @cmd: watchdog command
+ *      @arg: argument pointer
+ *
+ *      The watchdog API defines a common set of functions for all watchdogs
+ *      according to their available features.
+ */
+static long xwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       int status;
+
+       union {
+               struct watchdog_info __user *ident;
+               int __user *i;
+       } uarg;
+
+       uarg.i = (int __user *)arg;
+
+       switch (cmd) {
+       case WDIOC_GETSUPPORT:
+               return copy_to_user(uarg.ident, &ident,
+                                       sizeof(ident)) ? -EFAULT : 0;
+
+       case WDIOC_GETBOOTSTATUS:
+               return put_user(xdev.boot_status, uarg.i);
+
+       case WDIOC_GETSTATUS:
+               xwdt_get_status(&status);
+               return put_user(status, uarg.i);
+
+       case WDIOC_KEEPALIVE:
+               xwdt_keepalive();
+               return 0;
+
+       case WDIOC_GETTIMEOUT:
+               if (no_timeout)
+                       return -ENOTTY;
+               else
+                       return put_user(timeout, uarg.i);
+
+       default:
+               return -ENOTTY;
+       }
+}
+
+static const struct file_operations xwdt_fops = {
+       .owner      = THIS_MODULE,
+       .llseek     = no_llseek,
+       .write      = xwdt_write,
+       .open       = xwdt_open,
+       .release    = xwdt_release,
+       .unlocked_ioctl = xwdt_ioctl,
+};
+
+static struct miscdevice xwdt_miscdev = {
+       .minor      = WATCHDOG_MINOR,
+       .name       = "watchdog",
+       .fops       = &xwdt_fops,
+};
+
+static int __devinit xwdt_probe(struct platform_device *pdev)
+{
+       int rc;
+       u32 *tmptr;
+       u32 *pfreq;
+
+       no_timeout = 0;
+
+       pfreq = (u32 *)of_get_property(pdev->dev.of_node->parent,
+                                       "clock-frequency", NULL);
+
+       if (pfreq == NULL) {
+               printk(KERN_WARNING PFX
+                       "The watchdog clock frequency cannot be obtained!\n");
+               no_timeout = 1;
+       }
+
+       rc = of_address_to_resource(pdev->dev.of_node, 0, &xdev.res);
+       if (rc) {
+               printk(KERN_WARNING PFX "invalid address!\n");
+               return rc;
+       }
+
+       tmptr = (u32 *)of_get_property(pdev->dev.of_node,
+                                       "xlnx,wdt-interval", NULL);
+       if (tmptr == NULL) {
+               printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-interval\""
+                                       " not found in device tree!\n");
+               no_timeout = 1;
+       } else {
+               xdev.wdt_interval = *tmptr;
+       }
+
+       tmptr = (u32 *)of_get_property(pdev->dev.of_node,
+                                       "xlnx,wdt-enable-once", NULL);
+       if (tmptr == NULL) {
+               printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-enable-once\""
+                                       " not found in device tree!\n");
+               xdev.nowayout = WATCHDOG_NOWAYOUT;
+       }
+
+/*
+ *  Twice of the 2^wdt_interval / freq  because the first wdt overflow is
+ *  ignored (interrupt), reset is only generated at second wdt overflow
+ */
+       if (!no_timeout)
+               timeout = 2 * ((1<<xdev.wdt_interval) / *pfreq);
+
+       if (!request_mem_region(xdev.res.start,
+                       xdev.res.end - xdev.res.start + 1, WATCHDOG_NAME)) {
+               rc = -ENXIO;
+               printk(KERN_ERR PFX "memory request failure!\n");
+               goto err_out;
+       }
+
+       xdev.base = ioremap(xdev.res.start, xdev.res.end - xdev.res.start + 1);
+       if (xdev.base == NULL) {
+               rc = -ENOMEM;
+               printk(KERN_ERR PFX "ioremap failure!\n");
+               goto release_mem;
+       }
+
+       rc = xwdt_selftest();
+       if (rc == XWT_TIMER_FAILED) {
+               printk(KERN_ERR PFX "SelfTest routine error!\n");
+               goto unmap_io;
+       }
+
+       xwdt_get_status(&xdev.boot_status);
+
+       rc = misc_register(&xwdt_miscdev);
+       if (rc) {
+               printk(KERN_ERR PFX
+                       "cannot register miscdev on minor=%d (err=%d)\n",
+                                               xwdt_miscdev.minor, rc);
+               goto unmap_io;
+       }
+
+       if (no_timeout)
+               printk(KERN_INFO PFX
+                       "driver loaded (timeout=? sec, nowayout=%d)\n",
+                                                   xdev.nowayout);
+       else
+               printk(KERN_INFO PFX
+                       "driver loaded (timeout=%d sec, nowayout=%d)\n",
+                                       timeout, xdev.nowayout);
+
+       expect_close = 0;
+       clear_bit(0, &driver_open);
+
+       return 0;
+
+unmap_io:
+       iounmap(xdev.base);
+release_mem:
+       release_mem_region(xdev.res.start, resource_size(&xdev.res));
+err_out:
+       return rc;
+}
+
+static int __devexit xwdt_remove(struct platform_device *dev)
+{
+       misc_deregister(&xwdt_miscdev);
+       iounmap(xdev.base);
+       release_mem_region(xdev.res.start, resource_size(&xdev.res));
+
+       return 0;
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id __devinitdata xwdt_of_match[] = {
+       { .compatible = "xlnx,xps-timebase-wdt-1.01.a", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, xwdt_of_match);
+
+static struct platform_driver xwdt_driver = {
+       .probe       = xwdt_probe,
+       .remove      = __devexit_p(xwdt_remove),
+       .driver = {
+               .owner = THIS_MODULE,
+               .name  = WATCHDOG_NAME,
+               .of_match_table = xwdt_of_match,
+       },
+};
+
+static int __init xwdt_init(void)
+{
+       return platform_driver_register(&xwdt_driver);
+}
+
+static void __exit xwdt_exit(void)
+{
+       platform_driver_unregister(&xwdt_driver);
+}
+
+module_init(xwdt_init);
+module_exit(xwdt_exit);
+
+MODULE_AUTHOR("Alejandro Cabrera <aldaya@gmail.com>");
+MODULE_DESCRIPTION("Xilinx Watchdog driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
index b7c1390..e78d899 100644 (file)
@@ -56,6 +56,7 @@
 #define IO_DEFAULT     0x2E            /* Address used on Portwell Boards */
 
 static int io = IO_DEFAULT;
+static int swc_base_addr = -1;
 
 static int timeout = DEFAULT_TIMEOUT;  /* timeout value */
 static unsigned long timer_enabled;    /* is the timer enabled? */
@@ -116,9 +117,8 @@ static inline void pc87413_enable_swc(void)
 
 /* Read SWC I/O base address */
 
-static inline unsigned int pc87413_get_swc_base(void)
+static void pc87413_get_swc_base_addr(void)
 {
-       unsigned int  swc_base_addr = 0;
        unsigned char addr_l, addr_h = 0;
 
        /* Step 3: Read SWC I/O Base Address */
@@ -136,12 +136,11 @@ static inline unsigned int pc87413_get_swc_base(void)
                "Read SWC I/O Base Address: low %d, high %d, res %d\n",
                                                addr_l, addr_h, swc_base_addr);
 #endif
-       return swc_base_addr;
 }
 
 /* Select Bank 3 of SWC */
 
-static inline void pc87413_swc_bank3(unsigned int swc_base_addr)
+static inline void pc87413_swc_bank3(void)
 {
        /* Step 4: Select Bank3 of SWC */
        outb_p(inb(swc_base_addr + 0x0f) | 0x03, swc_base_addr + 0x0f);
@@ -152,8 +151,7 @@ static inline void pc87413_swc_bank3(unsigned int swc_base_addr)
 
 /* Set watchdog timeout to x minutes */
 
-static inline void pc87413_programm_wdto(unsigned int swc_base_addr,
-                                        char pc87413_time)
+static inline void pc87413_programm_wdto(char pc87413_time)
 {
        /* Step 5: Programm WDTO, Twd. */
        outb_p(pc87413_time, swc_base_addr + WDTO);
@@ -164,7 +162,7 @@ static inline void pc87413_programm_wdto(unsigned int swc_base_addr,
 
 /* Enable WDEN */
 
-static inline void pc87413_enable_wden(unsigned int swc_base_addr)
+static inline void pc87413_enable_wden(void)
 {
        /* Step 6: Enable WDEN */
        outb_p(inb(swc_base_addr + WDCTL) | 0x01, swc_base_addr + WDCTL);
@@ -174,7 +172,7 @@ static inline void pc87413_enable_wden(unsigned int swc_base_addr)
 }
 
 /* Enable SW_WD_TREN */
-static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr)
+static inline void pc87413_enable_sw_wd_tren(void)
 {
        /* Enable SW_WD_TREN */
        outb_p(inb(swc_base_addr + WDCFG) | 0x80, swc_base_addr + WDCFG);
@@ -185,7 +183,7 @@ static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr)
 
 /* Disable SW_WD_TREN */
 
-static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr)
+static inline void pc87413_disable_sw_wd_tren(void)
 {
        /* Disable SW_WD_TREN */
        outb_p(inb(swc_base_addr + WDCFG) & 0x7f, swc_base_addr + WDCFG);
@@ -196,7 +194,7 @@ static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr)
 
 /* Enable SW_WD_TRG */
 
-static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr)
+static inline void pc87413_enable_sw_wd_trg(void)
 {
        /* Enable SW_WD_TRG */
        outb_p(inb(swc_base_addr + WDCTL) | 0x80, swc_base_addr + WDCTL);
@@ -207,7 +205,7 @@ static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr)
 
 /* Disable SW_WD_TRG */
 
-static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr)
+static inline void pc87413_disable_sw_wd_trg(void)
 {
        /* Disable SW_WD_TRG */
        outb_p(inb(swc_base_addr + WDCTL) & 0x7f, swc_base_addr + WDCTL);
@@ -222,18 +220,13 @@ static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr)
 
 static void pc87413_enable(void)
 {
-       unsigned int swc_base_addr;
-
        spin_lock(&io_lock);
 
-       pc87413_select_wdt_out();
-       pc87413_enable_swc();
-       swc_base_addr = pc87413_get_swc_base();
-       pc87413_swc_bank3(swc_base_addr);
-       pc87413_programm_wdto(swc_base_addr, timeout);
-       pc87413_enable_wden(swc_base_addr);
-       pc87413_enable_sw_wd_tren(swc_base_addr);
-       pc87413_enable_sw_wd_trg(swc_base_addr);
+       pc87413_swc_bank3();
+       pc87413_programm_wdto(timeout);
+       pc87413_enable_wden();
+       pc87413_enable_sw_wd_tren();
+       pc87413_enable_sw_wd_trg();
 
        spin_unlock(&io_lock);
 }
@@ -242,17 +235,12 @@ static void pc87413_enable(void)
 
 static void pc87413_disable(void)
 {
-       unsigned int swc_base_addr;
-
        spin_lock(&io_lock);
 
-       pc87413_select_wdt_out();
-       pc87413_enable_swc();
-       swc_base_addr = pc87413_get_swc_base();
-       pc87413_swc_bank3(swc_base_addr);
-       pc87413_disable_sw_wd_tren(swc_base_addr);
-       pc87413_disable_sw_wd_trg(swc_base_addr);
-       pc87413_programm_wdto(swc_base_addr, 0);
+       pc87413_swc_bank3();
+       pc87413_disable_sw_wd_tren();
+       pc87413_disable_sw_wd_trg();
+       pc87413_programm_wdto(0);
 
        spin_unlock(&io_lock);
 }
@@ -261,20 +249,15 @@ static void pc87413_disable(void)
 
 static void pc87413_refresh(void)
 {
-       unsigned int swc_base_addr;
-
        spin_lock(&io_lock);
 
-       pc87413_select_wdt_out();
-       pc87413_enable_swc();
-       swc_base_addr = pc87413_get_swc_base();
-       pc87413_swc_bank3(swc_base_addr);
-       pc87413_disable_sw_wd_tren(swc_base_addr);
-       pc87413_disable_sw_wd_trg(swc_base_addr);
-       pc87413_programm_wdto(swc_base_addr, timeout);
-       pc87413_enable_wden(swc_base_addr);
-       pc87413_enable_sw_wd_tren(swc_base_addr);
-       pc87413_enable_sw_wd_trg(swc_base_addr);
+       pc87413_swc_bank3();
+       pc87413_disable_sw_wd_tren();
+       pc87413_disable_sw_wd_trg();
+       pc87413_programm_wdto(timeout);
+       pc87413_enable_wden();
+       pc87413_enable_sw_wd_tren();
+       pc87413_enable_sw_wd_trg();
 
        spin_unlock(&io_lock);
 }
@@ -528,7 +511,8 @@ static int __init pc87413_init(void)
        printk(KERN_INFO PFX "Version " VERSION " at io 0x%X\n",
                                                        WDT_INDEX_IO_PORT);
 
-       /* request_region(io, 2, "pc87413"); */
+       if (!request_muxed_region(io, 2, MODNAME))
+               return -EBUSY;
 
        ret = register_reboot_notifier(&pc87413_notifier);
        if (ret != 0) {
@@ -541,12 +525,32 @@ static int __init pc87413_init(void)
                printk(KERN_ERR PFX
                        "cannot register miscdev on minor=%d (err=%d)\n",
                        WATCHDOG_MINOR, ret);
-               unregister_reboot_notifier(&pc87413_notifier);
-               return ret;
+               goto reboot_unreg;
        }
        printk(KERN_INFO PFX "initialized. timeout=%d min \n", timeout);
+
+       pc87413_select_wdt_out();
+       pc87413_enable_swc();
+       pc87413_get_swc_base_addr();
+
+       if (!request_region(swc_base_addr, 0x20, MODNAME)) {
+               printk(KERN_ERR PFX
+                       "cannot request SWC region at 0x%x\n", swc_base_addr);
+               ret = -EBUSY;
+               goto misc_unreg;
+       }
+
        pc87413_enable();
+
+       release_region(io, 2);
        return 0;
+
+misc_unreg:
+       misc_deregister(&pc87413_miscdev);
+reboot_unreg:
+       unregister_reboot_notifier(&pc87413_notifier);
+       release_region(io, 2);
+       return ret;
 }
 
 /**
@@ -569,7 +573,7 @@ static void __exit pc87413_exit(void)
 
        misc_deregister(&pc87413_miscdev);
        unregister_reboot_notifier(&pc87413_notifier);
-       /* release_region(io, 2); */
+       release_region(swc_base_addr, 0x20);
 
        printk(KERN_INFO MODNAME " watchdog component driver removed.\n");
 }
index f7f5aa0..30da88f 100644 (file)
@@ -589,6 +589,15 @@ static int s3c2410wdt_resume(struct platform_device *dev)
 #define s3c2410wdt_resume  NULL
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_OF
+static const struct of_device_id s3c2410_wdt_match[] = {
+       { .compatible = "samsung,s3c2410-wdt" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
+#else
+#define s3c2410_wdt_match NULL
+#endif
 
 static struct platform_driver s3c2410wdt_driver = {
        .probe          = s3c2410wdt_probe,
@@ -599,6 +608,7 @@ static struct platform_driver s3c2410wdt_driver = {
        .driver         = {
                .owner  = THIS_MODULE,
                .name   = "s3c2410-wdt",
+               .of_match_table = s3c2410_wdt_match,
        },
 };
 
index c7cf4b0..029467e 100644 (file)
@@ -472,15 +472,10 @@ static void sch311x_wdt_shutdown(struct platform_device *dev)
        sch311x_wdt_stop();
 }
 
-#define sch311x_wdt_suspend NULL
-#define sch311x_wdt_resume  NULL
-
 static struct platform_driver sch311x_wdt_driver = {
        .probe          = sch311x_wdt_probe,
        .remove         = __devexit_p(sch311x_wdt_remove),
        .shutdown       = sch311x_wdt_shutdown,
-       .suspend        = sch311x_wdt_suspend,
-       .resume         = sch311x_wdt_resume,
        .driver         = {
                .owner = THIS_MODULE,
                .name = DRV_NAME,
index 0d80e08..cc2cfbe 100644 (file)
@@ -134,6 +134,8 @@ static void wdt_enable(void)
        writel(INT_ENABLE | RESET_ENABLE, wdt->base + WDTCONTROL);
        writel(LOCK, wdt->base + WDTLOCK);
 
+       /* Flush posted writes. */
+       readl(wdt->base + WDTLOCK);
        spin_unlock(&wdt->lock);
 }
 
@@ -144,9 +146,10 @@ static void wdt_disable(void)
 
        writel(UNLOCK, wdt->base + WDTLOCK);
        writel(0, wdt->base + WDTCONTROL);
-       writel(0, wdt->base + WDTLOAD);
        writel(LOCK, wdt->base + WDTLOCK);
 
+       /* Flush posted writes. */
+       readl(wdt->base + WDTLOCK);
        spin_unlock(&wdt->lock);
 }
 
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
new file mode 100644 (file)
index 0000000..cfa1a15
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ *     watchdog_core.c
+ *
+ *     (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ *                                             All Rights Reserved.
+ *
+ *     (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *     This source code is part of the generic code that can be used
+ *     by all the watchdog timer drivers.
+ *
+ *     Based on source code of the following authors:
+ *       Matt Domsch <Matt_Domsch@dell.com>,
+ *       Rob Radez <rob@osinvestor.com>,
+ *       Rusty Lynch <rusty@linux.co.intel.com>
+ *       Satyam Sharma <satyam@infradead.org>
+ *       Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ *     Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ *     admit liability nor provide warranty for any of this software.
+ *     This material is provided "AS-IS" and at no charge.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>      /* For EXPORT_SYMBOL/module stuff/... */
+#include <linux/types.h>       /* For standard types */
+#include <linux/errno.h>       /* For the -ENODEV/... values */
+#include <linux/kernel.h>      /* For printk/panic/... */
+#include <linux/watchdog.h>    /* For watchdog specific items */
+#include <linux/init.h>                /* For __init/__exit/... */
+
+#include "watchdog_dev.h"      /* For watchdog_dev_register/... */
+
+/**
+ * watchdog_register_device() - register a watchdog device
+ * @wdd: watchdog device
+ *
+ * Register a watchdog device with the kernel so that the
+ * watchdog timer can be accessed from userspace.
+ *
+ * A zero is returned on success and a negative errno code for
+ * failure.
+ */
+int watchdog_register_device(struct watchdog_device *wdd)
+{
+       int ret;
+
+       if (wdd == NULL || wdd->info == NULL || wdd->ops == NULL)
+               return -EINVAL;
+
+       /* Mandatory operations need to be supported */
+       if (wdd->ops->start == NULL || wdd->ops->stop == NULL)
+               return -EINVAL;
+
+       /*
+        * Check that we have valid min and max timeout values, if
+        * not reset them both to 0 (=not used or unknown)
+        */
+       if (wdd->min_timeout > wdd->max_timeout) {
+               pr_info("Invalid min and max timeout values, resetting to 0!\n");
+               wdd->min_timeout = 0;
+               wdd->max_timeout = 0;
+       }
+
+       /*
+        * Note: now that all watchdog_device data has been verified, we
+        * will not check this anymore in other functions. If data gets
+        * corrupted in a later stage then we expect a kernel panic!
+        */
+
+       /* We only support 1 watchdog device via the /dev/watchdog interface */
+       ret = watchdog_dev_register(wdd);
+       if (ret) {
+               pr_err("error registering /dev/watchdog (err=%d).\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(watchdog_register_device);
+
+/**
+ * watchdog_unregister_device() - unregister a watchdog device
+ * @wdd: watchdog device to unregister
+ *
+ * Unregister a watchdog device that was previously successfully
+ * registered with watchdog_register_device().
+ */
+void watchdog_unregister_device(struct watchdog_device *wdd)
+{
+       int ret;
+
+       if (wdd == NULL)
+               return;
+
+       ret = watchdog_dev_unregister(wdd);
+       if (ret)
+               pr_err("error unregistering /dev/watchdog (err=%d).\n", ret);
+}
+EXPORT_SYMBOL_GPL(watchdog_unregister_device);
+
+MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>");
+MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>");
+MODULE_DESCRIPTION("WatchDog Timer Driver Core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
new file mode 100644 (file)
index 0000000..d33520d
--- /dev/null
@@ -0,0 +1,395 @@
+/*
+ *     watchdog_dev.c
+ *
+ *     (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ *                                             All Rights Reserved.
+ *
+ *     (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *
+ *     This source code is part of the generic code that can be used
+ *     by all the watchdog timer drivers.
+ *
+ *     This part of the generic code takes care of the following
+ *     misc device: /dev/watchdog.
+ *
+ *     Based on source code of the following authors:
+ *       Matt Domsch <Matt_Domsch@dell.com>,
+ *       Rob Radez <rob@osinvestor.com>,
+ *       Rusty Lynch <rusty@linux.co.intel.com>
+ *       Satyam Sharma <satyam@infradead.org>
+ *       Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ *     Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ *     admit liability nor provide warranty for any of this software.
+ *     This material is provided "AS-IS" and at no charge.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>      /* For module stuff/... */
+#include <linux/types.h>       /* For standard types (like size_t) */
+#include <linux/errno.h>       /* For the -ENODEV/... values */
+#include <linux/kernel.h>      /* For printk/panic/... */
+#include <linux/fs.h>          /* For file operations */
+#include <linux/watchdog.h>    /* For watchdog specific items */
+#include <linux/miscdevice.h>  /* For handling misc devices */
+#include <linux/init.h>                /* For __init/__exit/... */
+#include <linux/uaccess.h>     /* For copy_to_user/put_user/... */
+
+/* make sure we only register one /dev/watchdog device */
+static unsigned long watchdog_dev_busy;
+/* the watchdog device behind /dev/watchdog */
+static struct watchdog_device *wdd;
+
+/*
+ *     watchdog_ping: ping the watchdog.
+ *     @wddev: the watchdog device to ping
+ *
+ *     If the watchdog has no own ping operation then it needs to be
+ *     restarted via the start operation. This wrapper function does
+ *     exactly that.
+ *     We only ping when the watchdog device is running.
+ */
+
+static int watchdog_ping(struct watchdog_device *wddev)
+{
+       if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+               if (wddev->ops->ping)
+                       return wddev->ops->ping(wddev);  /* ping the watchdog */
+               else
+                       return wddev->ops->start(wddev); /* restart watchdog */
+       }
+       return 0;
+}
+
+/*
+ *     watchdog_start: wrapper to start the watchdog.
+ *     @wddev: the watchdog device to start
+ *
+ *     Start the watchdog if it is not active and mark it active.
+ *     This function returns zero on success or a negative errno code for
+ *     failure.
+ */
+
+static int watchdog_start(struct watchdog_device *wddev)
+{
+       int err;
+
+       if (!test_bit(WDOG_ACTIVE, &wdd->status)) {
+               err = wddev->ops->start(wddev);
+               if (err < 0)
+                       return err;
+
+               set_bit(WDOG_ACTIVE, &wdd->status);
+       }
+       return 0;
+}
+
+/*
+ *     watchdog_stop: wrapper to stop the watchdog.
+ *     @wddev: the watchdog device to stop
+ *
+ *     Stop the watchdog if it is still active and unmark it active.
+ *     This function returns zero on success or a negative errno code for
+ *     failure.
+ *     If the 'nowayout' feature was set, the watchdog cannot be stopped.
+ */
+
+static int watchdog_stop(struct watchdog_device *wddev)
+{
+       int err = -EBUSY;
+
+       if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) {
+               pr_info("%s: nowayout prevents watchdog to be stopped!\n",
+                                                       wdd->info->identity);
+               return err;
+       }
+
+       if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+               err = wddev->ops->stop(wddev);
+               if (err < 0)
+                       return err;
+
+               clear_bit(WDOG_ACTIVE, &wdd->status);
+       }
+       return 0;
+}
+
+/*
+ *     watchdog_write: writes to the watchdog.
+ *     @file: file from VFS
+ *     @data: user address of data
+ *     @len: length of data
+ *     @ppos: pointer to the file offset
+ *
+ *     A write to a watchdog device is defined as a keepalive ping.
+ *     Writing the magic 'V' sequence allows the next close to turn
+ *     off the watchdog (if 'nowayout' is not set).
+ */
+
+static ssize_t watchdog_write(struct file *file, const char __user *data,
+                                               size_t len, loff_t *ppos)
+{
+       size_t i;
+       char c;
+
+       if (len == 0)
+               return 0;
+
+       /*
+        * Note: just in case someone wrote the magic character
+        * five months ago...
+        */
+       clear_bit(WDOG_ALLOW_RELEASE, &wdd->status);
+
+       /* scan to see whether or not we got the magic character */
+       for (i = 0; i != len; i++) {
+               if (get_user(c, data + i))
+                       return -EFAULT;
+               if (c == 'V')
+                       set_bit(WDOG_ALLOW_RELEASE, &wdd->status);
+       }
+
+       /* someone wrote to us, so we send the watchdog a keepalive ping */
+       watchdog_ping(wdd);
+
+       return len;
+}
+
+/*
+ *     watchdog_ioctl: handle the different ioctl's for the watchdog device.
+ *     @file: file handle to the device
+ *     @cmd: watchdog command
+ *     @arg: argument pointer
+ *
+ *     The watchdog API defines a common set of functions for all watchdogs
+ *     according to their available features.
+ */
+
+static long watchdog_ioctl(struct file *file, unsigned int cmd,
+                                                       unsigned long arg)
+{
+       void __user *argp = (void __user *)arg;
+       int __user *p = argp;
+       unsigned int val;
+       int err;
+
+       if (wdd->ops->ioctl) {
+               err = wdd->ops->ioctl(wdd, cmd, arg);
+               if (err != -ENOIOCTLCMD)
+                       return err;
+       }
+
+       switch (cmd) {
+       case WDIOC_GETSUPPORT:
+               return copy_to_user(argp, wdd->info,
+                       sizeof(struct watchdog_info)) ? -EFAULT : 0;
+       case WDIOC_GETSTATUS:
+               val = wdd->ops->status ? wdd->ops->status(wdd) : 0;
+               return put_user(val, p);
+       case WDIOC_GETBOOTSTATUS:
+               return put_user(wdd->bootstatus, p);
+       case WDIOC_SETOPTIONS:
+               if (get_user(val, p))
+                       return -EFAULT;
+               if (val & WDIOS_DISABLECARD) {
+                       err = watchdog_stop(wdd);
+                       if (err < 0)
+                               return err;
+               }
+               if (val & WDIOS_ENABLECARD) {
+                       err = watchdog_start(wdd);
+                       if (err < 0)
+                               return err;
+               }
+               return 0;
+       case WDIOC_KEEPALIVE:
+               if (!(wdd->info->options & WDIOF_KEEPALIVEPING))
+                       return -EOPNOTSUPP;
+               watchdog_ping(wdd);
+               return 0;
+       case WDIOC_SETTIMEOUT:
+               if ((wdd->ops->set_timeout == NULL) ||
+                   !(wdd->info->options & WDIOF_SETTIMEOUT))
+                       return -EOPNOTSUPP;
+               if (get_user(val, p))
+                       return -EFAULT;
+               if ((wdd->max_timeout != 0) &&
+                   (val < wdd->min_timeout || val > wdd->max_timeout))
+                               return -EINVAL;
+               err = wdd->ops->set_timeout(wdd, val);
+               if (err < 0)
+                       return err;
+               wdd->timeout = val;
+               /* If the watchdog is active then we send a keepalive ping
+                * to make sure that the watchdog keep's running (and if
+                * possible that it takes the new timeout) */
+               watchdog_ping(wdd);
+               /* Fall */
+       case WDIOC_GETTIMEOUT:
+               /* timeout == 0 means that we don't know the timeout */
+               if (wdd->timeout == 0)
+                       return -EOPNOTSUPP;
+               return put_user(wdd->timeout, p);
+       default:
+               return -ENOTTY;
+       }
+}
+
+/*
+ *     watchdog_open: open the /dev/watchdog device.
+ *     @inode: inode of device
+ *     @file: file handle to device
+ *
+ *     When the /dev/watchdog device gets opened, we start the watchdog.
+ *     Watch out: the /dev/watchdog device is single open, so we make sure
+ *     it can only be opened once.
+ */
+
+static int watchdog_open(struct inode *inode, struct file *file)
+{
+       int err = -EBUSY;
+
+       /* the watchdog is single open! */
+       if (test_and_set_bit(WDOG_DEV_OPEN, &wdd->status))
+               return -EBUSY;
+
+       /*
+        * If the /dev/watchdog device is open, we don't want the module
+        * to be unloaded.
+        */
+       if (!try_module_get(wdd->ops->owner))
+               goto out;
+
+       err = watchdog_start(wdd);
+       if (err < 0)
+               goto out_mod;
+
+       /* dev/watchdog is a virtual (and thus non-seekable) filesystem */
+       return nonseekable_open(inode, file);
+
+out_mod:
+       module_put(wdd->ops->owner);
+out:
+       clear_bit(WDOG_DEV_OPEN, &wdd->status);
+       return err;
+}
+
+/*
+ *      watchdog_release: release the /dev/watchdog device.
+ *      @inode: inode of device
+ *      @file: file handle to device
+ *
+ *     This is the code for when /dev/watchdog gets closed. We will only
+ *     stop the watchdog when we have received the magic char (and nowayout
+ *     was not set), else the watchdog will keep running.
+ */
+
+static int watchdog_release(struct inode *inode, struct file *file)
+{
+       int err = -EBUSY;
+
+       /*
+        * We only stop the watchdog if we received the magic character
+        * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then
+        * watchdog_stop will fail.
+        */
+       if (test_and_clear_bit(WDOG_ALLOW_RELEASE, &wdd->status) ||
+           !(wdd->info->options & WDIOF_MAGICCLOSE))
+               err = watchdog_stop(wdd);
+
+       /* If the watchdog was not stopped, send a keepalive ping */
+       if (err < 0) {
+               pr_crit("%s: watchdog did not stop!\n", wdd->info->identity);
+               watchdog_ping(wdd);
+       }
+
+       /* Allow the owner module to be unloaded again */
+       module_put(wdd->ops->owner);
+
+       /* make sure that /dev/watchdog can be re-opened */
+       clear_bit(WDOG_DEV_OPEN, &wdd->status);
+
+       return 0;
+}
+
+static const struct file_operations watchdog_fops = {
+       .owner          = THIS_MODULE,
+       .write          = watchdog_write,
+       .unlocked_ioctl = watchdog_ioctl,
+       .open           = watchdog_open,
+       .release        = watchdog_release,
+};
+
+static struct miscdevice watchdog_miscdev = {
+       .minor          = WATCHDOG_MINOR,
+       .name           = "watchdog",
+       .fops           = &watchdog_fops,
+};
+
+/*
+ *     watchdog_dev_register:
+ *     @watchdog: watchdog device
+ *
+ *     Register a watchdog device as /dev/watchdog. /dev/watchdog
+ *     is actually a miscdevice and thus we set it up like that.
+ */
+
+int watchdog_dev_register(struct watchdog_device *watchdog)
+{
+       int err;
+
+       /* Only one device can register for /dev/watchdog */
+       if (test_and_set_bit(0, &watchdog_dev_busy)) {
+               pr_err("only one watchdog can use /dev/watchdog.\n");
+               return -EBUSY;
+       }
+
+       wdd = watchdog;
+
+       err = misc_register(&watchdog_miscdev);
+       if (err != 0) {
+               pr_err("%s: cannot register miscdev on minor=%d (err=%d).\n",
+                       watchdog->info->identity, WATCHDOG_MINOR, err);
+               goto out;
+       }
+
+       return 0;
+
+out:
+       wdd = NULL;
+       clear_bit(0, &watchdog_dev_busy);
+       return err;
+}
+
+/*
+ *     watchdog_dev_unregister:
+ *     @watchdog: watchdog device
+ *
+ *     Deregister the /dev/watchdog device.
+ */
+
+int watchdog_dev_unregister(struct watchdog_device *watchdog)
+{
+       /* Check that a watchdog device was registered in the past */
+       if (!test_bit(0, &watchdog_dev_busy) || !wdd)
+               return -ENODEV;
+
+       /* We can only unregister the watchdog device that was registered */
+       if (watchdog != wdd) {
+               pr_err("%s: watchdog was not registered as /dev/watchdog.\n",
+                       watchdog->info->identity);
+               return -ENODEV;
+       }
+
+       misc_deregister(&watchdog_miscdev);
+       wdd = NULL;
+       clear_bit(0, &watchdog_dev_busy);
+       return 0;
+}
diff --git a/drivers/watchdog/watchdog_dev.h b/drivers/watchdog/watchdog_dev.h
new file mode 100644 (file)
index 0000000..bc7612b
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ *     watchdog_core.h
+ *
+ *     (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ *                                             All Rights Reserved.
+ *
+ *     (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *     This source code is part of the generic code that can be used
+ *     by all the watchdog timer drivers.
+ *
+ *     Based on source code of the following authors:
+ *       Matt Domsch <Matt_Domsch@dell.com>,
+ *       Rob Radez <rob@osinvestor.com>,
+ *       Rusty Lynch <rusty@linux.co.intel.com>
+ *       Satyam Sharma <satyam@infradead.org>
+ *       Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ *     Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ *     admit liability nor provide warranty for any of this software.
+ *     This material is provided "AS-IS" and at no charge.
+ */
+
+/*
+ *     Functions/procedures to be called by the core
+ */
+int watchdog_dev_register(struct watchdog_device *);
+int watchdog_dev_unregister(struct watchdog_device *);
index 4d433d3..f11e43e 100644 (file)
@@ -187,7 +187,7 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd);
  */
 static struct inode *anon_inode_mkinode(void)
 {
-       struct inode *inode = new_inode(anon_inode_mnt->mnt_sb);
+       struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
 
        if (!inode)
                return ERR_PTR(-ENOMEM);
index 52d7eca..502b9e9 100644 (file)
@@ -34,6 +34,9 @@ struct btrfs_inode {
         */
        struct btrfs_key location;
 
+       /* Lock for counters */
+       spinlock_t lock;
+
        /* the extent_tree has caches of all the extent mappings to disk */
        struct extent_map_tree extent_tree;
 
@@ -134,8 +137,8 @@ struct btrfs_inode {
         * items we think we'll end up using, and reserved_extents is the number
         * of extent items we've reserved metadata for.
         */
-       atomic_t outstanding_extents;
-       atomic_t reserved_extents;
+       unsigned outstanding_extents;
+       unsigned reserved_extents;
 
        /*
         * ordered_data_close is set by truncate when a file that used
@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
        BTRFS_I(inode)->disk_i_size = size;
 }
 
+static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
+                                      struct inode *inode)
+{
+       if (root == root->fs_info->tree_root ||
+           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+               return true;
+       return false;
+}
+
 #endif
index 2e66786..011cab3 100644 (file)
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
 {
        int i;
        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-               if (p->nodes[i] && p->locks[i])
-                       btrfs_set_lock_blocking(p->nodes[i]);
+               if (!p->nodes[i] || !p->locks[i])
+                       continue;
+               btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
+               if (p->locks[i] == BTRFS_READ_LOCK)
+                       p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
+               else if (p->locks[i] == BTRFS_WRITE_LOCK)
+                       p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
        }
 }
 
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
  * for held
  */
 noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
-                                       struct extent_buffer *held)
+                                       struct extent_buffer *held, int held_rw)
 {
        int i;
 
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
         * really sure by forcing the path to blocking before we clear
         * the path blocking.
         */
-       if (held)
-               btrfs_set_lock_blocking(held);
+       if (held) {
+               btrfs_set_lock_blocking_rw(held, held_rw);
+               if (held_rw == BTRFS_WRITE_LOCK)
+                       held_rw = BTRFS_WRITE_LOCK_BLOCKING;
+               else if (held_rw == BTRFS_READ_LOCK)
+                       held_rw = BTRFS_READ_LOCK_BLOCKING;
+       }
        btrfs_set_path_blocking(p);
 #endif
 
        for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
-               if (p->nodes[i] && p->locks[i])
-                       btrfs_clear_lock_blocking(p->nodes[i]);
+               if (p->nodes[i] && p->locks[i]) {
+                       btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
+                       if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
+                               p->locks[i] = BTRFS_WRITE_LOCK;
+                       else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
+                               p->locks[i] = BTRFS_READ_LOCK;
+               }
        }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (held)
-               btrfs_clear_lock_blocking(held);
+               btrfs_clear_lock_blocking_rw(held, held_rw);
 #endif
 }
 
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)
                if (!p->nodes[i])
                        continue;
                if (p->locks[i]) {
-                       btrfs_tree_unlock(p->nodes[i]);
+                       btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
                        p->locks[i] = 0;
                }
                free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
        return eb;
 }
 
+/* loop around taking references on and locking the root node of the
+ * tree until you end up with a lock on the root.  A locked buffer
+ * is returned, with a reference held.
+ */
+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+{
+       struct extent_buffer *eb;
+
+       while (1) {
+               eb = btrfs_root_node(root);
+               btrfs_tree_read_lock(eb);
+               if (eb == root->node)
+                       break;
+               btrfs_tree_read_unlock(eb);
+               free_extent_buffer(eb);
+       }
+       return eb;
+}
+
 /* cowonly root (everything not a reference counted cow subvolume), just get
  * put onto a simple dirty list.  transaction.c walks this to make sure they
  * get properly updated on disk.
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        for (i = start_slot; i < end_slot; i++) {
                int close = 1;
 
-               if (!parent->map_token) {
-                       map_extent_buffer(parent,
-                                       btrfs_node_key_ptr_offset(i),
-                                       sizeof(struct btrfs_key_ptr),
-                                       &parent->map_token, &parent->kaddr,
-                                       &parent->map_start, &parent->map_len,
-                                       KM_USER1);
-               }
                btrfs_node_key(parent, &disk_key, i);
                if (!progress_passed && comp_keys(&disk_key, progress) < 0)
                        continue;
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        last_block = blocknr;
                        continue;
                }
-               if (parent->map_token) {
-                       unmap_extent_buffer(parent, parent->map_token,
-                                           KM_USER1);
-                       parent->map_token = NULL;
-               }
 
                cur = btrfs_find_tree_block(root, blocknr, blocksize);
                if (cur)
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                btrfs_tree_unlock(cur);
                free_extent_buffer(cur);
        }
-       if (parent->map_token) {
-               unmap_extent_buffer(parent, parent->map_token,
-                                   KM_USER1);
-               parent->map_token = NULL;
-       }
        return err;
 }
 
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
        struct btrfs_disk_key *tmp = NULL;
        struct btrfs_disk_key unaligned;
        unsigned long offset;
-       char *map_token = NULL;
        char *kaddr = NULL;
        unsigned long map_start = 0;
        unsigned long map_len = 0;
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
                mid = (low + high) / 2;
                offset = p + mid * item_size;
 
-               if (!map_token || offset < map_start ||
+               if (!kaddr || offset < map_start ||
                    (offset + sizeof(struct btrfs_disk_key)) >
                    map_start + map_len) {
-                       if (map_token) {
-                               unmap_extent_buffer(eb, map_token, KM_USER0);
-                               map_token = NULL;
-                       }
 
                        err = map_private_extent_buffer(eb, offset,
                                                sizeof(struct btrfs_disk_key),
-                                               &map_token, &kaddr,
-                                               &map_start, &map_len, KM_USER0);
+                                               &kaddr, &map_start, &map_len);
 
                        if (!err) {
                                tmp = (struct btrfs_disk_key *)(kaddr + offset -
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
                        high = mid;
                else {
                        *slot = mid;
-                       if (map_token)
-                               unmap_extent_buffer(eb, map_token, KM_USER0);
                        return 0;
                }
        }
        *slot = low;
-       if (map_token)
-               unmap_extent_buffer(eb, map_token, KM_USER0);
        return 1;
 }
 
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
        mid = path->nodes[level];
 
-       WARN_ON(!path->locks[level]);
+       WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
+               path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
        WARN_ON(btrfs_header_generation(mid) != trans->transid);
 
        orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root,
        u32 nr;
        u32 blocksize;
        u32 nscan = 0;
-       bool map = true;
 
        if (level != 1)
                return;
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root,
 
        nritems = btrfs_header_nritems(node);
        nr = slot;
-       if (node->map_token || path->skip_locking)
-               map = false;
 
        while (1) {
-               if (map && !node->map_token) {
-                       unsigned long offset = btrfs_node_key_ptr_offset(nr);
-                       map_private_extent_buffer(node, offset,
-                                                 sizeof(struct btrfs_key_ptr),
-                                                 &node->map_token,
-                                                 &node->kaddr,
-                                                 &node->map_start,
-                                                 &node->map_len, KM_USER1);
-               }
                if (direction < 0) {
                        if (nr == 0)
                                break;
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root,
                if ((search <= target && target - search <= 65536) ||
                    (search > target && search - target <= 65536)) {
                        gen = btrfs_node_ptr_generation(node, nr);
-                       if (map && node->map_token) {
-                               unmap_extent_buffer(node, node->map_token,
-                                                   KM_USER1);
-                               node->map_token = NULL;
-                       }
                        readahead_tree_block(root, search, blocksize, gen);
                        nread += blocksize;
                }
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root,
                if ((nread > 65536 || nscan > 32))
                        break;
        }
-       if (map && node->map_token) {
-               unmap_extent_buffer(node, node->map_token, KM_USER1);
-               node->map_token = NULL;
-       }
 }
 
 /*
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
 
                t = path->nodes[i];
                if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
-                       btrfs_tree_unlock(t);
+                       btrfs_tree_unlock_rw(t, path->locks[i]);
                        path->locks[i] = 0;
                }
        }
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
                        continue;
                if (!path->locks[i])
                        continue;
-               btrfs_tree_unlock(path->nodes[i]);
+               btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
                path->locks[i] = 0;
        }
 }
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
                         * we can trust our generation number
                         */
                        free_extent_buffer(tmp);
+                       btrfs_set_path_blocking(p);
+
                        tmp = read_tree_block(root, blocknr, blocksize, gen);
                        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
                                *eb_ret = tmp;
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 static int
 setup_nodes_for_search(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct btrfs_path *p,
-                      struct extent_buffer *b, int level, int ins_len)
+                      struct extent_buffer *b, int level, int ins_len,
+                      int *write_lock_level)
 {
        int ret;
        if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
            BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
                int sret;
 
+               if (*write_lock_level < level + 1) {
+                       *write_lock_level = level + 1;
+                       btrfs_release_path(p);
+                       goto again;
+               }
+
                sret = reada_for_balance(root, p, level);
                if (sret)
                        goto again;
 
                btrfs_set_path_blocking(p);
                sret = split_node(trans, root, p, level);
-               btrfs_clear_path_blocking(p, NULL);
+               btrfs_clear_path_blocking(p, NULL, 0);
 
                BUG_ON(sret > 0);
                if (sret) {
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
                   BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
                int sret;
 
+               if (*write_lock_level < level + 1) {
+                       *write_lock_level = level + 1;
+                       btrfs_release_path(p);
+                       goto again;
+               }
+
                sret = reada_for_balance(root, p, level);
                if (sret)
                        goto again;
 
                btrfs_set_path_blocking(p);
                sret = balance_level(trans, root, p, level);
-               btrfs_clear_path_blocking(p, NULL);
+               btrfs_clear_path_blocking(p, NULL, 0);
 
                if (sret) {
                        ret = sret;
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        int err;
        int level;
        int lowest_unlock = 1;
+       int root_lock;
+       /* everything at write_lock_level or lower must be write locked */
+       int write_lock_level = 0;
        u8 lowest_level = 0;
 
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
        WARN_ON(p->nodes[0] != NULL);
 
-       if (ins_len < 0)
+       if (ins_len < 0) {
                lowest_unlock = 2;
 
+               /* when we are removing items, we might have to go up to level
+                * two as we update tree pointers  Make sure we keep write
+                * for those levels as well
+                */
+               write_lock_level = 2;
+       } else if (ins_len > 0) {
+               /*
+                * for inserting items, make sure we have a write lock on
+                * level 1 so we can update keys
+                */
+               write_lock_level = 1;
+       }
+
+       if (!cow)
+               write_lock_level = -1;
+
+       if (cow && (p->keep_locks || p->lowest_level))
+               write_lock_level = BTRFS_MAX_LEVEL;
+
 again:
+       /*
+        * we try very hard to do read locks on the root
+        */
+       root_lock = BTRFS_READ_LOCK;
+       level = 0;
        if (p->search_commit_root) {
+               /*
+                * the commit roots are read only
+                * so we always do read locks
+                */
                b = root->commit_root;
                extent_buffer_get(b);
+               level = btrfs_header_level(b);
                if (!p->skip_locking)
-                       btrfs_tree_lock(b);
+                       btrfs_tree_read_lock(b);
        } else {
-               if (p->skip_locking)
+               if (p->skip_locking) {
                        b = btrfs_root_node(root);
-               else
-                       b = btrfs_lock_root_node(root);
+                       level = btrfs_header_level(b);
+               } else {
+                       /* we don't know the level of the root node
+                        * until we actually have it read locked
+                        */
+                       b = btrfs_read_lock_root_node(root);
+                       level = btrfs_header_level(b);
+                       if (level <= write_lock_level) {
+                               /* whoops, must trade for write lock */
+                               btrfs_tree_read_unlock(b);
+                               free_extent_buffer(b);
+                               b = btrfs_lock_root_node(root);
+                               root_lock = BTRFS_WRITE_LOCK;
+
+                               /* the level might have changed, check again */
+                               level = btrfs_header_level(b);
+                       }
+               }
        }
+       p->nodes[level] = b;
+       if (!p->skip_locking)
+               p->locks[level] = root_lock;
 
        while (b) {
                level = btrfs_header_level(b);
@@ -1644,10 +1696,6 @@ again:
                 * setup the path here so we can release it under lock
                 * contention with the cow code
                 */
-               p->nodes[level] = b;
-               if (!p->skip_locking)
-                       p->locks[level] = 1;
-
                if (cow) {
                        /*
                         * if we don't really need to cow this block
@@ -1659,6 +1707,16 @@ again:
 
                        btrfs_set_path_blocking(p);
 
+                       /*
+                        * must have write locks on this node and the
+                        * parent
+                        */
+                       if (level + 1 > write_lock_level) {
+                               write_lock_level = level + 1;
+                               btrfs_release_path(p);
+                               goto again;
+                       }
+
                        err = btrfs_cow_block(trans, root, b,
                                              p->nodes[level + 1],
                                              p->slots[level + 1], &b);
@@ -1671,10 +1729,7 @@ cow_done:
                BUG_ON(!cow && ins_len);
 
                p->nodes[level] = b;
-               if (!p->skip_locking)
-                       p->locks[level] = 1;
-
-               btrfs_clear_path_blocking(p, NULL);
+               btrfs_clear_path_blocking(p, NULL, 0);
 
                /*
                 * we have a lock on b and as long as we aren't changing
@@ -1700,7 +1755,7 @@ cow_done:
                        }
                        p->slots[level] = slot;
                        err = setup_nodes_for_search(trans, root, p, b, level,
-                                                    ins_len);
+                                            ins_len, &write_lock_level);
                        if (err == -EAGAIN)
                                goto again;
                        if (err) {
@@ -1710,6 +1765,19 @@ cow_done:
                        b = p->nodes[level];
                        slot = p->slots[level];
 
+                       /*
+                        * slot 0 is special, if we change the key
+                        * we have to update the parent pointer
+                        * which means we must have a write lock
+                        * on the parent
+                        */
+                       if (slot == 0 && cow &&
+                           write_lock_level < level + 1) {
+                               write_lock_level = level + 1;
+                               btrfs_release_path(p);
+                               goto again;
+                       }
+
                        unlock_up(p, level, lowest_unlock);
 
                        if (level == lowest_level) {
@@ -1728,23 +1796,42 @@ cow_done:
                        }
 
                        if (!p->skip_locking) {
-                               btrfs_clear_path_blocking(p, NULL);
-                               err = btrfs_try_spin_lock(b);
-
-                               if (!err) {
-                                       btrfs_set_path_blocking(p);
-                                       btrfs_tree_lock(b);
-                                       btrfs_clear_path_blocking(p, b);
+                               level = btrfs_header_level(b);
+                               if (level <= write_lock_level) {
+                                       err = btrfs_try_tree_write_lock(b);
+                                       if (!err) {
+                                               btrfs_set_path_blocking(p);
+                                               btrfs_tree_lock(b);
+                                               btrfs_clear_path_blocking(p, b,
+                                                                 BTRFS_WRITE_LOCK);
+                                       }
+                                       p->locks[level] = BTRFS_WRITE_LOCK;
+                               } else {
+                                       err = btrfs_try_tree_read_lock(b);
+                                       if (!err) {
+                                               btrfs_set_path_blocking(p);
+                                               btrfs_tree_read_lock(b);
+                                               btrfs_clear_path_blocking(p, b,
+                                                                 BTRFS_READ_LOCK);
+                                       }
+                                       p->locks[level] = BTRFS_READ_LOCK;
                                }
+                               p->nodes[level] = b;
                        }
                } else {
                        p->slots[level] = slot;
                        if (ins_len > 0 &&
                            btrfs_leaf_free_space(root, b) < ins_len) {
+                               if (write_lock_level < 1) {
+                                       write_lock_level = 1;
+                                       btrfs_release_path(p);
+                                       goto again;
+                               }
+
                                btrfs_set_path_blocking(p);
                                err = split_leaf(trans, root, key,
                                                 p, ins_len, ret == 0);
-                               btrfs_clear_path_blocking(p, NULL);
+                               btrfs_clear_path_blocking(p, NULL, 0);
 
                                BUG_ON(err > 0);
                                if (err) {
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        add_root_to_dirty_list(root);
        extent_buffer_get(c);
        path->nodes[level] = c;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK;
        path->slots[level] = 0;
        return 0;
 }
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                if (path->slots[0] == i)
                        push_space += data_size;
 
-               if (!left->map_token) {
-                       map_extent_buffer(left, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &left->map_token, &left->kaddr,
-                                       &left->map_start, &left->map_len,
-                                       KM_USER1);
-               }
-
                this_item_size = btrfs_item_size(left, item);
                if (this_item_size + sizeof(*item) + push_space > free_space)
                        break;
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                        break;
                i--;
        }
-       if (left->map_token) {
-               unmap_extent_buffer(left, left->map_token, KM_USER1);
-               left->map_token = NULL;
-       }
 
        if (push_items == 0)
                goto out_unlock;
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        push_space = BTRFS_LEAF_DATA_SIZE(root);
        for (i = 0; i < right_nritems; i++) {
                item = btrfs_item_nr(right, i);
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
                push_space -= btrfs_item_size(right, item);
                btrfs_set_item_offset(right, item, push_space);
        }
 
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
        left_nritems -= push_items;
        btrfs_set_header_nritems(left, left_nritems);
 
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 
        for (i = 0; i < nr; i++) {
                item = btrfs_item_nr(right, i);
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
 
                if (!empty && push_items > 0) {
                        if (path->slots[0] < i)
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                push_space += this_item_size + sizeof(*item);
        }
 
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
-
        if (push_items == 0) {
                ret = 1;
                goto out;
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                u32 ioff;
 
                item = btrfs_item_nr(left, i);
-               if (!left->map_token) {
-                       map_extent_buffer(left, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &left->map_token, &left->kaddr,
-                                       &left->map_start, &left->map_len,
-                                       KM_USER1);
-               }
 
                ioff = btrfs_item_offset(left, item);
                btrfs_set_item_offset(left, item,
                      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
        }
        btrfs_set_header_nritems(left, old_left_nritems + push_items);
-       if (left->map_token) {
-               unmap_extent_buffer(left, left->map_token, KM_USER1);
-               left->map_token = NULL;
-       }
 
        /* fixup right node */
        if (push_items > right_nritems) {
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        for (i = 0; i < right_nritems; i++) {
                item = btrfs_item_nr(right, i);
 
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
-
                push_space = push_space - btrfs_item_size(right, item);
                btrfs_set_item_offset(right, item, push_space);
        }
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
 
        btrfs_mark_buffer_dirty(left);
        if (right_nritems)
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
                struct btrfs_item *item = btrfs_item_nr(right, i);
                u32 ioff;
 
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
-
                ioff = btrfs_item_offset(right, item);
                btrfs_set_item_offset(right, item, ioff + rt_data_off);
        }
 
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
-
        btrfs_set_header_nritems(l, mid);
        ret = 0;
        btrfs_item_key(right, &disk_key, 0);
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
                u32 ioff;
                item = btrfs_item_nr(leaf, i);
 
-               if (!leaf->map_token) {
-                       map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-               }
-
                ioff = btrfs_item_offset(leaf, item);
                btrfs_set_item_offset(leaf, item, ioff + size_diff);
        }
 
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
-
        /* shift the data */
        if (from_end) {
                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
                u32 ioff;
                item = btrfs_item_nr(leaf, i);
 
-               if (!leaf->map_token) {
-                       map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-               }
                ioff = btrfs_item_offset(leaf, item);
                btrfs_set_item_offset(leaf, item, ioff - data_size);
        }
 
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
-
        /* shift the data */
        memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
                      data_end - data_size, btrfs_leaf_data(leaf) +
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
                 */
                /* first correct the data pointers */
-               WARN_ON(leaf->map_token);
                for (i = slot; i < nritems; i++) {
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       if (!leaf->map_token) {
-                               map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-                       }
-
                        ioff = btrfs_item_offset(leaf, item);
                        btrfs_set_item_offset(leaf, item, ioff - total_data);
                }
-               if (leaf->map_token) {
-                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-                       leaf->map_token = NULL;
-               }
-
                /* shift the items */
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
                              btrfs_item_nr_offset(slot),
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
                 */
                /* first correct the data pointers */
-               WARN_ON(leaf->map_token);
                for (i = slot; i < nritems; i++) {
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       if (!leaf->map_token) {
-                               map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-                       }
-
                        ioff = btrfs_item_offset(leaf, item);
                        btrfs_set_item_offset(leaf, item, ioff - total_data);
                }
-               if (leaf->map_token) {
-                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-                       leaf->map_token = NULL;
-               }
-
                /* shift the items */
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
                              btrfs_item_nr_offset(slot),
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       if (!leaf->map_token) {
-                               map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-                       }
                        ioff = btrfs_item_offset(leaf, item);
                        btrfs_set_item_offset(leaf, item, ioff + dsize);
                }
 
-               if (leaf->map_token) {
-                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-                       leaf->map_token = NULL;
-               }
-
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
                              btrfs_item_nr_offset(slot + nr),
                              sizeof(struct btrfs_item) *
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
 
        WARN_ON(!path->keep_locks);
 again:
-       cur = btrfs_lock_root_node(root);
+       cur = btrfs_read_lock_root_node(root);
        level = btrfs_header_level(cur);
        WARN_ON(path->nodes[level]);
        path->nodes[level] = cur;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_READ_LOCK;
 
        if (btrfs_header_generation(cur) < min_trans) {
                ret = 1;
@@ -4098,12 +4049,12 @@ find_next_key:
                cur = read_node_slot(root, cur, slot);
                BUG_ON(!cur);
 
-               btrfs_tree_lock(cur);
+               btrfs_tree_read_lock(cur);
 
-               path->locks[level - 1] = 1;
+               path->locks[level - 1] = BTRFS_READ_LOCK;
                path->nodes[level - 1] = cur;
                unlock_up(path, level, 1);
-               btrfs_clear_path_blocking(path, NULL);
+               btrfs_clear_path_blocking(path, NULL, 0);
        }
 out:
        if (ret == 0)
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
        u32 nritems;
        int ret;
        int old_spinning = path->leave_spinning;
-       int force_blocking = 0;
+       int next_rw_lock = 0;
 
        nritems = btrfs_header_nritems(path->nodes[0]);
        if (nritems == 0)
                return 1;
 
-       /*
-        * we take the blocks in an order that upsets lockdep.  Using
-        * blocking mode is the only way around it.
-        */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       force_blocking = 1;
-#endif
-
        btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
 again:
        level = 1;
        next = NULL;
+       next_rw_lock = 0;
        btrfs_release_path(path);
 
        path->keep_locks = 1;
-
-       if (!force_blocking)
-               path->leave_spinning = 1;
+       path->leave_spinning = 1;
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        path->keep_locks = 0;
@@ -4281,11 +4223,12 @@ again:
                }
 
                if (next) {
-                       btrfs_tree_unlock(next);
+                       btrfs_tree_unlock_rw(next, next_rw_lock);
                        free_extent_buffer(next);
                }
 
                next = c;
+               next_rw_lock = path->locks[level];
                ret = read_block_for_search(NULL, root, path, &next, level,
                                            slot, &key);
                if (ret == -EAGAIN)
@@ -4297,15 +4240,14 @@ again:
                }
 
                if (!path->skip_locking) {
-                       ret = btrfs_try_spin_lock(next);
+                       ret = btrfs_try_tree_read_lock(next);
                        if (!ret) {
                                btrfs_set_path_blocking(path);
-                               btrfs_tree_lock(next);
-                               if (!force_blocking)
-                                       btrfs_clear_path_blocking(path, next);
+                               btrfs_tree_read_lock(next);
+                               btrfs_clear_path_blocking(path, next,
+                                                         BTRFS_READ_LOCK);
                        }
-                       if (force_blocking)
-                               btrfs_set_lock_blocking(next);
+                       next_rw_lock = BTRFS_READ_LOCK;
                }
                break;
        }
@@ -4314,14 +4256,13 @@ again:
                level--;
                c = path->nodes[level];
                if (path->locks[level])
-                       btrfs_tree_unlock(c);
+                       btrfs_tree_unlock_rw(c, path->locks[level]);
 
                free_extent_buffer(c);
                path->nodes[level] = next;
                path->slots[level] = 0;
                if (!path->skip_locking)
-                       path->locks[level] = 1;
-
+                       path->locks[level] = next_rw_lock;
                if (!level)
                        break;
 
@@ -4336,16 +4277,14 @@ again:
                }
 
                if (!path->skip_locking) {
-                       btrfs_assert_tree_locked(path->nodes[level]);
-                       ret = btrfs_try_spin_lock(next);
+                       ret = btrfs_try_tree_read_lock(next);
                        if (!ret) {
                                btrfs_set_path_blocking(path);
-                               btrfs_tree_lock(next);
-                               if (!force_blocking)
-                                       btrfs_clear_path_blocking(path, next);
+                               btrfs_tree_read_lock(next);
+                               btrfs_clear_path_blocking(path, next,
+                                                         BTRFS_READ_LOCK);
                        }
-                       if (force_blocking)
-                               btrfs_set_lock_blocking(next);
+                       next_rw_lock = BTRFS_READ_LOCK;
                }
        }
        ret = 0;
index fe9287b..365c4e1 100644 (file)
@@ -755,6 +755,8 @@ struct btrfs_space_info {
                                   chunks for this space */
        unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
 
+       unsigned int flush:1;           /* set if we are trying to make space */
+
        unsigned int force_alloc;       /* set if we need to force a chunk
                                           alloc for this space */
 
@@ -764,7 +766,7 @@ struct btrfs_space_info {
        struct list_head block_groups[BTRFS_NR_RAID_TYPES];
        spinlock_t lock;
        struct rw_semaphore groups_sem;
-       atomic_t caching_threads;
+       wait_queue_head_t wait;
 };
 
 struct btrfs_block_rsv {
@@ -824,6 +826,7 @@ struct btrfs_caching_control {
        struct list_head list;
        struct mutex mutex;
        wait_queue_head_t wait;
+       struct btrfs_work work;
        struct btrfs_block_group_cache *block_group;
        u64 progress;
        atomic_t count;
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info {
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers endio_freespace_worker;
        struct btrfs_workers submit_workers;
+       struct btrfs_workers caching_workers;
+
        /*
         * fixup workers take dirty pages that didn't properly go through
         * the cow mechanism and make them safe to write.  It happens
@@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 
 /* extent-tree.c */
 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
-                                                int num_items)
+                                                unsigned num_items)
 {
        return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
                3 * num_items;
@@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               int num_items);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void);
 void btrfs_free_path(struct btrfs_path *p);
 void btrfs_set_path_blocking(struct btrfs_path *p);
 void btrfs_clear_path_blocking(struct btrfs_path *p,
-                              struct extent_buffer *held);
+                              struct extent_buffer *held, int held_rw);
 void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
 
 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
index 98c68e6..b52c672 100644 (file)
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
        }
 
        /* reset all the locked nodes in the patch to spinning locks. */
-       btrfs_clear_path_blocking(path, NULL);
+       btrfs_clear_path_blocking(path, NULL, 0);
 
        /* insert the keys of the items */
        ret = setup_items_for_insert(trans, root, path, keys, data_size,
index 685f259..c360a84 100644 (file)
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
        data_size = sizeof(*dir_item) + name_len + data_len;
        dir_item = insert_with_overflow(trans, root, path, &key, data_size,
                                        name, name_len);
-       /*
-        * FIXME: at some point we should handle xattr's that are larger than
-        * what we can fit in our leaf.  We set location to NULL b/c we arent
-        * pointing at anything else, that will change if we store the xattr
-        * data in a separate inode.
-        */
-       BUG_ON(IS_ERR(dir_item));
+       if (IS_ERR(dir_item))
+               return PTR_ERR(dir_item);
        memset(&location, 0, sizeof(location));
 
        leaf = path->nodes[0];
index b231ae1..07b3ac6 100644 (file)
@@ -100,38 +100,83 @@ struct async_submit_bio {
        struct btrfs_work work;
 };
 
-/* These are used to set the lockdep class on the extent buffer locks.
- * The class is set by the readpage_end_io_hook after the buffer has
- * passed csum validation but before the pages are unlocked.
+/*
+ * Lockdep class keys for extent_buffer->lock's in this root.  For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
+ *
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets.  As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->objectid.  This ensures that all special purpose roots
+ * have separate keysets.
  *
- * The lockdep class is also set by btrfs_init_new_buffer on freshly
- * allocated blocks.
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock.  As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
  *
- * The class is based on the level in the tree block, which allows lockdep
- * to know that lower nodes nest inside the locks of higher nodes.
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked.  It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
  *
- * We also add a check to make sure the highest level of the tree is
- * the same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this
- * code needs update as well.
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # if BTRFS_MAX_LEVEL != 8
 #  error
 # endif
-static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
-static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
-       /* leaf */
-       "btrfs-extent-00",
-       "btrfs-extent-01",
-       "btrfs-extent-02",
-       "btrfs-extent-03",
-       "btrfs-extent-04",
-       "btrfs-extent-05",
-       "btrfs-extent-06",
-       "btrfs-extent-07",
-       /* highest possible level */
-       "btrfs-extent-08",
+
+static struct btrfs_lockdep_keyset {
+       u64                     id;             /* root objectid */
+       const char              *name_stem;     /* lock name stem */
+       char                    names[BTRFS_MAX_LEVEL + 1][20];
+       struct lock_class_key   keys[BTRFS_MAX_LEVEL + 1];
+} btrfs_lockdep_keysets[] = {
+       { .id = BTRFS_ROOT_TREE_OBJECTID,       .name_stem = "root"     },
+       { .id = BTRFS_EXTENT_TREE_OBJECTID,     .name_stem = "extent"   },
+       { .id = BTRFS_CHUNK_TREE_OBJECTID,      .name_stem = "chunk"    },
+       { .id = BTRFS_DEV_TREE_OBJECTID,        .name_stem = "dev"      },
+       { .id = BTRFS_FS_TREE_OBJECTID,         .name_stem = "fs"       },
+       { .id = BTRFS_CSUM_TREE_OBJECTID,       .name_stem = "csum"     },
+       { .id = BTRFS_ORPHAN_OBJECTID,          .name_stem = "orphan"   },
+       { .id = BTRFS_TREE_LOG_OBJECTID,        .name_stem = "log"      },
+       { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
+       { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
+       { .id = 0,                              .name_stem = "tree"     },
 };
+
+void __init btrfs_init_lockdep(void)
+{
+       int i, j;
+
+       /* initialize lockdep class names */
+       for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+               struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+
+               for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+                       snprintf(ks->names[j], sizeof(ks->names[j]),
+                                "btrfs-%s-%02d", ks->name_stem, j);
+       }
+}
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+                                   int level)
+{
+       struct btrfs_lockdep_keyset *ks;
+
+       BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+       /* find the matching keyset, id 0 is the default entry */
+       for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+               if (ks->id == objectid)
+                       break;
+
+       lockdep_set_class_and_name(&eb->lock,
+                                  &ks->keys[level], ks->names[level]);
+}
+
 #endif
 
 /*
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
-       char *map_token = NULL;
        char *kaddr;
        unsigned long map_start;
        unsigned long map_len;
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
        len = buf->len - offset;
        while (len > 0) {
                err = map_private_extent_buffer(buf, offset, 32,
-                                       &map_token, &kaddr,
-                                       &map_start, &map_len, KM_USER0);
+                                       &kaddr, &map_start, &map_len);
                if (err)
                        return 1;
                cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                                      crc, cur_len);
                len -= cur_len;
                offset += cur_len;
-               unmap_extent_buffer(buf, map_token, KM_USER0);
        }
        if (csum_size > sizeof(inline_result)) {
                result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
-{
-       lockdep_set_class_and_name(&eb->lock,
-                          &btrfs_eb_class[level],
-                          btrfs_eb_name[level]);
-}
-#endif
-
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state)
 {
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        }
        found_level = btrfs_header_level(eb);
 
-       btrfs_set_buffer_lockdep_class(eb, found_level);
+       btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+                                      eb, found_level);
 
        ret = csum_tree_block(root, eb, 1);
        if (ret) {
@@ -1598,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                goto fail_bdi;
        }
 
-       fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+       mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1802,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                           fs_info->thread_pool_size),
                           &fs_info->generic_worker);
 
+       btrfs_init_workers(&fs_info->caching_workers, "cache",
+                          2, &fs_info->generic_worker);
+
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
         * devices
@@ -1855,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
        btrfs_start_workers(&fs_info->delayed_workers, 1);
+       btrfs_start_workers(&fs_info->caching_workers, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2112,6 +2150,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
+       btrfs_stop_workers(&fs_info->caching_workers);
 fail_alloc:
        kfree(fs_info->delayed_root);
 fail_iput:
@@ -2577,6 +2616,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
+       btrfs_stop_workers(&fs_info->caching_workers);
 
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
index a0b610a..bec3ea4 100644 (file)
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page);
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+void btrfs_init_lockdep(void);
+void btrfs_set_buffer_lockdep_class(u64 objectid,
+                                   struct extent_buffer *eb, int level);
 #else
-static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
-                                                int level)
+static inline void btrfs_init_lockdep(void)
+{ }
+static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+                                       struct extent_buffer *eb, int level)
 {
 }
 #endif
index 71cd456..4d08ed7 100644 (file)
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
        return total_added;
 }
 
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
 {
-       struct btrfs_block_group_cache *block_group = data;
-       struct btrfs_fs_info *fs_info = block_group->fs_info;
-       struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
-       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_caching_control *caching_ctl;
+       struct btrfs_root *extent_root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
        u32 nritems;
        int ret = 0;
 
+       caching_ctl = container_of(work, struct btrfs_caching_control, work);
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+       extent_root = fs_info->extent_root;
+
        path = btrfs_alloc_path();
        if (!path)
-               return -ENOMEM;
+               goto out;
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
@@ -433,13 +438,11 @@ err:
        free_excluded_extents(extent_root, block_group);
 
        mutex_unlock(&caching_ctl->mutex);
+out:
        wake_up(&caching_ctl->wait);
 
        put_caching_control(caching_ctl);
-       atomic_dec(&block_group->space_info->caching_threads);
        btrfs_put_block_group(block_group);
-
-       return 0;
 }
 
 static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_caching_control *caching_ctl;
-       struct task_struct *tsk;
        int ret = 0;
 
        smp_mb();
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        caching_ctl->progress = cache->key.objectid;
        /* one for caching kthread, one for caching block group list */
        atomic_set(&caching_ctl->count, 2);
+       caching_ctl->work.func = caching_thread;
 
        spin_lock(&cache->lock);
        if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
        up_write(&fs_info->extent_commit_sem);
 
-       atomic_inc(&cache->space_info->caching_threads);
        btrfs_get_block_group(cache);
 
-       tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
-                         cache->key.objectid);
-       if (IS_ERR(tsk)) {
-               ret = PTR_ERR(tsk);
-               printk(KERN_ERR "error running thread %d\n", ret);
-               BUG();
-       }
+       btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
 
        return ret;
 }
@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->full = 0;
        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
        found->chunk_alloc = 0;
+       found->flush = 0;
+       init_waitqueue_head(&found->wait);
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
-       atomic_set(&found->caching_threads, 0);
        return 0;
 }
 
@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        if (reserved == 0)
                return 0;
 
+       smp_mb();
+       if (root->fs_info->delalloc_bytes == 0) {
+               if (trans)
+                       return 0;
+               btrfs_wait_ordered_extents(root, 0, 0);
+               return 0;
+       }
+
        max_reclaim = min(reserved, to_reclaim);
 
        while (loops < 1024) {
@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                }
 
        }
+       if (reclaimed >= to_reclaim && !trans)
+               btrfs_wait_ordered_extents(root, 0, 0);
        return reclaimed >= to_reclaim;
 }
 
@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
        u64 num_bytes = orig_bytes;
        int retries = 0;
        int ret = 0;
-       bool reserved = false;
        bool committed = false;
+       bool flushing = false;
 
 again:
-       ret = -ENOSPC;
-       if (reserved)
-               num_bytes = 0;
-
+       ret = 0;
        spin_lock(&space_info->lock);
+       /*
+        * We only want to wait if somebody other than us is flushing and we are
+        * actually alloed to flush.
+        */
+       while (flush && !flushing && space_info->flush) {
+               spin_unlock(&space_info->lock);
+               /*
+                * If we have a trans handle we can't wait because the flusher
+                * may have to commit the transaction, which would mean we would
+                * deadlock since we are waiting for the flusher to finish, but
+                * hold the current transaction open.
+                */
+               if (trans)
+                       return -EAGAIN;
+               ret = wait_event_interruptible(space_info->wait,
+                                              !space_info->flush);
+               /* Must have been interrupted, return */
+               if (ret)
+                       return -EINTR;
+
+               spin_lock(&space_info->lock);
+       }
+
+       ret = -ENOSPC;
        unused = space_info->bytes_used + space_info->bytes_reserved +
                 space_info->bytes_pinned + space_info->bytes_readonly +
                 space_info->bytes_may_use;
@@ -3403,8 +3431,7 @@ again:
        if (unused <= space_info->total_bytes) {
                unused = space_info->total_bytes - unused;
                if (unused >= num_bytes) {
-                       if (!reserved)
-                               space_info->bytes_reserved += orig_bytes;
+                       space_info->bytes_reserved += orig_bytes;
                        ret = 0;
                } else {
                        /*
@@ -3429,17 +3456,14 @@ again:
         * to reclaim space we can actually use it instead of somebody else
         * stealing it from us.
         */
-       if (ret && !reserved) {
-               space_info->bytes_reserved += orig_bytes;
-               reserved = true;
+       if (ret && flush) {
+               flushing = true;
+               space_info->flush = 1;
        }
 
        spin_unlock(&space_info->lock);
 
-       if (!ret)
-               return 0;
-
-       if (!flush)
+       if (!ret || !flush)
                goto out;
 
        /*
@@ -3447,11 +3471,11 @@ again:
         * metadata until after the IO is completed.
         */
        ret = shrink_delalloc(trans, root, num_bytes, 1);
-       if (ret > 0)
-               return 0;
-       else if (ret < 0)
+       if (ret < 0)
                goto out;
 
+       ret = 0;
+
        /*
         * So if we were overcommitted it's possible that somebody else flushed
         * out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3486,11 @@ again:
                goto again;
        }
 
-       spin_lock(&space_info->lock);
        /*
         * Not enough space to be reclaimed, don't bother committing the
         * transaction.
         */
+       spin_lock(&space_info->lock);
        if (space_info->bytes_pinned < orig_bytes)
                ret = -ENOSPC;
        spin_unlock(&space_info->lock);
@@ -3474,10 +3498,13 @@ again:
                goto out;
 
        ret = -EAGAIN;
-       if (trans || committed)
+       if (trans)
                goto out;
 
        ret = -ENOSPC;
+       if (committed)
+               goto out;
+
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                goto out;
@@ -3489,12 +3516,12 @@ again:
        }
 
 out:
-       if (reserved) {
+       if (flushing) {
                spin_lock(&space_info->lock);
-               space_info->bytes_reserved -= orig_bytes;
+               space_info->flush = 0;
+               wake_up_all(&space_info->wait);
                spin_unlock(&space_info->lock);
        }
-
        return ret;
 }
 
@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
        if (commit_trans) {
                if (trans)
                        return -EAGAIN;
-
                trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
                ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                int num_items)
-{
-       u64 num_bytes;
-       int ret;
-
-       if (num_items == 0 || root->fs_info->chunk_root == root)
-               return 0;
-
-       num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
-       ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-                                 num_bytes);
-       if (!ret) {
-               trans->bytes_reserved += num_bytes;
-               trans->block_rsv = &root->fs_info->trans_block_rsv;
-       }
-       return ret;
-}
-
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root)
 {
@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+       unsigned dropped_extents = 0;
+
+       spin_lock(&BTRFS_I(inode)->lock);
+       BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+       BTRFS_I(inode)->outstanding_extents--;
+
+       /*
+        * If we have more or the same amount of outsanding extents than we have
+        * reserved then we need to leave the reserved extents count alone.
+        */
+       if (BTRFS_I(inode)->outstanding_extents >=
+           BTRFS_I(inode)->reserved_extents)
+               goto out;
+
+       dropped_extents = BTRFS_I(inode)->reserved_extents -
+               BTRFS_I(inode)->outstanding_extents;
+       BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+       spin_unlock(&BTRFS_I(inode)->lock);
+       return dropped_extents;
+}
+
 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
 {
        return num_bytes >>= 3;
@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
-       u64 to_reserve;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_reserve = 0;
+       unsigned nr_extents = 0;
        int ret;
 
        if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
 
-       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+
+       if (BTRFS_I(inode)->outstanding_extents >
+           BTRFS_I(inode)->reserved_extents) {
+               nr_extents = BTRFS_I(inode)->outstanding_extents -
+                       BTRFS_I(inode)->reserved_extents;
+               BTRFS_I(inode)->reserved_extents += nr_extents;
 
-       if (nr_extents > reserved_extents) {
-               nr_extents -= reserved_extents;
                to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
-       } else {
-               nr_extents = 0;
-               to_reserve = 0;
        }
+       spin_unlock(&BTRFS_I(inode)->lock);
 
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
-       if (ret)
+       if (ret) {
+               unsigned dropped;
+               /*
+                * We don't need the return value since our reservation failed,
+                * we just need to clean up our counter.
+                */
+               dropped = drop_outstanding_extent(inode);
+               WARN_ON(dropped > 1);
                return ret;
-
-       atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       }
 
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
-       if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve, 0);
-
        return 0;
 }
 
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 to_free;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_free = 0;
+       unsigned dropped;
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
-       do {
-               int old, new;
-
-               nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
-               if (nr_extents >= reserved_extents) {
-                       nr_extents = 0;
-                       break;
-               }
-               old = reserved_extents;
-               nr_extents = reserved_extents - nr_extents;
-               new = reserved_extents - nr_extents;
-               old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
-                                    reserved_extents, new);
-               if (likely(old == reserved_extents))
-                       break;
-               reserved_extents = old;
-       } while (1);
+       dropped = drop_outstanding_extent(inode);
 
        to_free = calc_csum_metadata_size(inode, num_bytes);
-       if (nr_extents > 0)
-               to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+       if (dropped > 0)
+               to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
        btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
                                to_free);
@@ -4990,14 +5002,10 @@ have_block_group:
                        }
 
                        /*
-                        * We only want to start kthread caching if we are at
-                        * the point where we will wait for caching to make
-                        * progress, or if our ideal search is over and we've
-                        * found somebody to start caching.
+                        * The caching workers are limited to 2 threads, so we
+                        * can queue as much work as we care to.
                         */
-                       if (loop > LOOP_CACHING_NOWAIT ||
-                           (loop > LOOP_FIND_IDEAL &&
-                            atomic_read(&space_info->caching_threads) < 2)) {
+                       if (loop > LOOP_FIND_IDEAL) {
                                ret = cache_block_group(block_group, trans,
                                                        orig_root, 0);
                                BUG_ON(ret);
@@ -5219,8 +5227,7 @@ loop:
                if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
                        loop++;
-                       if (!ideal_cache_percent &&
-                           atomic_read(&space_info->caching_threads))
+                       if (!ideal_cache_percent)
                                goto search;
 
                        /*
@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        if (!buf)
                return ERR_PTR(-ENOMEM);
        btrfs_set_header_generation(buf, trans->transid);
-       btrfs_set_buffer_lockdep_class(buf, level);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
        clean_tree_block(trans, root, buf);
 
@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                        return 1;
 
                if (path->locks[level] && !wc->keep_locks) {
-                       btrfs_tree_unlock(eb);
+                       btrfs_tree_unlock_rw(eb, path->locks[level]);
                        path->locks[level] = 0;
                }
                return 0;
@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
         * keep the tree lock
         */
        if (path->locks[level] && level > 0) {
-               btrfs_tree_unlock(eb);
+               btrfs_tree_unlock_rw(eb, path->locks[level]);
                path->locks[level] = 0;
        }
        return 0;
@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        BUG_ON(level != btrfs_header_level(next));
        path->nodes[level] = next;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
        wc->level = level;
        if (wc->level == 1)
                wc->reada_slot = 0;
@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(level == 0);
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
                        ret = btrfs_lookup_extent_info(trans, root,
                                                       eb->start, eb->len,
@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                        BUG_ON(wc->refs[level] == 0);
                        if (wc->refs[level] == 1) {
-                               btrfs_tree_unlock(eb);
-                               path->locks[level] = 0;
+                               btrfs_tree_unlock_rw(eb, path->locks[level]);
                                return 1;
                        }
                }
@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                    btrfs_header_generation(eb) == trans->transid) {
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
                clean_tree_block(trans, root, eb);
        }
@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
                                return 0;
 
                        if (path->locks[level]) {
-                               btrfs_tree_unlock(path->nodes[level]);
+                               btrfs_tree_unlock_rw(path->nodes[level],
+                                                    path->locks[level]);
                                path->locks[level] = 0;
                        }
                        free_extent_buffer(path->nodes[level]);
@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                path->nodes[level] = btrfs_lock_root_node(root);
                btrfs_set_lock_blocking(path->nodes[level]);
                path->slots[level] = 0;
-               path->locks[level] = 1;
+               path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                memset(&wc->update_progress, 0,
                       sizeof(wc->update_progress));
        } else {
@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        level = btrfs_header_level(node);
        path->nodes[level] = node;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
        wc->refs[parent_level] = 1;
        wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        return flags;
 }
 
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 {
        struct btrfs_space_info *sinfo = cache->space_info;
        u64 num_bytes;
+       u64 min_allocable_bytes;
        int ret = -ENOSPC;
 
        if (cache->ro)
                return 0;
 
+       /*
+        * We need some metadata space and system metadata space for
+        * allocating chunks in some corner cases until we force to set
+        * it to be readonly.
+        */
+       if ((sinfo->flags &
+            (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+           !force)
+               min_allocable_bytes = 1 * 1024 * 1024;
+       else
+               min_allocable_bytes = 0;
+
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
        num_bytes = cache->key.offset - cache->reserved - cache->pinned -
@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
 
        if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
            sinfo->bytes_may_use + sinfo->bytes_readonly +
-           cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+           cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+           sinfo->total_bytes) {
                sinfo->bytes_readonly += num_bytes;
                sinfo->bytes_reserved += cache->reserved_pinned;
                cache->reserved_pinned = 0;
@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
                               CHUNK_ALLOC_FORCE);
 
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                             CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
 out:
        btrfs_end_transaction(trans, root);
        return ret;
@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
                set_avail_alloc_bits(root->fs_info, cache->flags);
                if (btrfs_chunk_readonly(root, cache->key.objectid))
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * mirrored block groups.
                 */
                list_for_each_entry(cache, &space_info->block_groups[3], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
                list_for_each_entry(cache, &space_info->block_groups[4], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        init_global_block_rsv(info);
index 561262d..067b174 100644 (file)
@@ -281,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree,
                if (other->start == state->end + 1 &&
                    other->state == state->state) {
                        merge_cb(tree, state, other);
-                       other->start = state->start;
-                       state->tree = NULL;
-                       rb_erase(&state->rb_node, &tree->state);
-                       free_extent_state(state);
-                       state = NULL;
+                       state->end = other->end;
+                       other->tree = NULL;
+                       rb_erase(&other->rb_node, &tree->state);
+                       free_extent_state(other);
                }
        }
 
@@ -351,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree,
                       "%llu %llu\n", (unsigned long long)found->start,
                       (unsigned long long)found->end,
                       (unsigned long long)start, (unsigned long long)end);
-               free_extent_state(state);
                return -EEXIST;
        }
        state->tree = tree;
@@ -500,7 +498,8 @@ again:
                        cached_state = NULL;
                }
 
-               if (cached && cached->tree && cached->start == start) {
+               if (cached && cached->tree && cached->start <= start &&
+                   cached->end > start) {
                        if (clear)
                                atomic_dec(&cached->refs);
                        state = cached;
@@ -742,7 +741,8 @@ again:
        spin_lock(&tree->lock);
        if (cached_state && *cached_state) {
                state = *cached_state;
-               if (state->start == start && state->tree) {
+               if (state->start <= start && state->end > start &&
+                   state->tree) {
                        node = &state->rb_node;
                        goto hit_next;
                }
@@ -783,13 +783,13 @@ hit_next:
                if (err)
                        goto out;
 
-               next_node = rb_next(node);
                cache_state(state, cached_state);
                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
 
                start = last_end + 1;
+               next_node = rb_next(&state->rb_node);
                if (next_node && start < end && prealloc && !need_resched()) {
                        state = rb_entry(next_node, struct extent_state,
                                         rb_node);
@@ -862,7 +862,6 @@ hit_next:
                 * Avoid to free 'prealloc' if it can be merged with
                 * the later extent.
                 */
-               atomic_inc(&prealloc->refs);
                err = insert_state(tree, prealloc, start, this_end,
                                   &bits);
                BUG_ON(err == -EEXIST);
@@ -872,7 +871,6 @@ hit_next:
                        goto out;
                }
                cache_state(prealloc, cached_state);
-               free_extent_state(prealloc);
                prealloc = NULL;
                start = this_end + 1;
                goto search_again;
@@ -1564,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
        int bitset = 0;
 
        spin_lock(&tree->lock);
-       if (cached && cached->tree && cached->start == start)
+       if (cached && cached->tree && cached->start <= start &&
+           cached->end > start)
                node = &cached->rb_node;
        else
                node = tree_search(tree, start);
@@ -2432,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
+       int tag;
 
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -2442,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                end = wbc->range_end >> PAGE_CACHE_SHIFT;
                scanned = 1;
        }
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
 retry:
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag_pages_for_writeback(mapping, index, end);
        while (!done && !nr_to_write_done && (index <= end) &&
-              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                             PAGECACHE_TAG_DIRTY, min(end - index,
-                                 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
                unsigned i;
 
                scanned = 1;
@@ -3020,8 +3025,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                return NULL;
        eb->start = start;
        eb->len = len;
-       spin_lock_init(&eb->lock);
-       init_waitqueue_head(&eb->lock_wq);
+       rwlock_init(&eb->lock);
+       atomic_set(&eb->write_locks, 0);
+       atomic_set(&eb->read_locks, 0);
+       atomic_set(&eb->blocking_readers, 0);
+       atomic_set(&eb->blocking_writers, 0);
+       atomic_set(&eb->spinning_readers, 0);
+       atomic_set(&eb->spinning_writers, 0);
+       init_waitqueue_head(&eb->write_lock_wq);
+       init_waitqueue_head(&eb->read_lock_wq);
 
 #if LEAK_DEBUG
        spin_lock_irqsave(&leak_lock, flags);
@@ -3117,7 +3129,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                i = 0;
        }
        for (; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
+               p = find_or_create_page(mapping, index, GFP_NOFS);
                if (!p) {
                        WARN_ON(1);
                        goto free_eb;
@@ -3264,6 +3276,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
        return was_dirty;
 }
 
+static int __eb_straddles_pages(u64 start, u64 len)
+{
+       if (len < PAGE_CACHE_SIZE)
+               return 1;
+       if (start & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       if ((start + len) & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       return 0;
+}
+
+static int eb_straddles_pages(struct extent_buffer *eb)
+{
+       return __eb_straddles_pages(eb->start, eb->len);
+}
+
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
                                struct extent_buffer *eb,
                                struct extent_state **cached_state)
@@ -3275,8 +3303,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
        clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
-       clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                             cached_state, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                     cached_state, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if (page)
@@ -3294,8 +3324,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
 
        num_pages = num_extent_pages(eb->start, eb->len);
 
-       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           NULL, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                   NULL, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3318,9 +3350,12 @@ int extent_range_uptodate(struct extent_io_tree *tree,
        int uptodate;
        unsigned long index;
 
-       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
-       if (ret)
-               return 1;
+       if (__eb_straddles_pages(start, end - start + 1)) {
+               ret = test_range_bit(tree, start, end,
+                                    EXTENT_UPTODATE, 1, NULL);
+               if (ret)
+                       return 1;
+       }
        while (start <= end) {
                index = start >> PAGE_CACHE_SHIFT;
                page = find_get_page(tree->mapping, index);
@@ -3348,10 +3383,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 1;
 
-       ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, cached_state);
-       if (ret)
-               return ret;
+       if (eb_straddles_pages(eb)) {
+               ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, cached_state);
+               if (ret)
+                       return ret;
+       }
 
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++) {
@@ -3384,9 +3421,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
 
-       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, NULL)) {
-               return 0;
+       if (eb_straddles_pages(eb)) {
+               if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, NULL)) {
+                       return 0;
+               }
        }
 
        if (start) {
@@ -3490,9 +3529,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
                page = extent_buffer_page(eb, i);
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
 
                dst += cur;
                len -= cur;
@@ -3502,9 +3540,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 }
 
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                              unsigned long min_len, char **token, char **map,
+                              unsigned long min_len, char **map,
                               unsigned long *map_start,
-                              unsigned long *map_len, int km)
+                              unsigned long *map_len)
 {
        size_t offset = start & (PAGE_CACHE_SIZE - 1);
        char *kaddr;
@@ -3534,42 +3572,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
        }
 
        p = extent_buffer_page(eb, i);
-       kaddr = kmap_atomic(p, km);
-       *token = kaddr;
+       kaddr = page_address(p);
        *map = kaddr + offset;
        *map_len = PAGE_CACHE_SIZE - offset;
        return 0;
 }
 
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                     unsigned long min_len,
-                     char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km)
-{
-       int err;
-       int save = 0;
-       if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, km);
-               eb->map_token = NULL;
-               save = 1;
-       }
-       err = map_private_extent_buffer(eb, start, min_len, token, map,
-                                      map_start, map_len, km);
-       if (!err && save) {
-               eb->map_token = *token;
-               eb->kaddr = *map;
-               eb->map_start = *map_start;
-               eb->map_len = *map_len;
-       }
-       return err;
-}
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
-       kunmap_atomic(token, km);
-}
-
 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
                          unsigned long start,
                          unsigned long len)
@@ -3593,9 +3601,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                ret = memcmp(ptr, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
                if (ret)
                        break;
 
@@ -3628,9 +3635,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER1);
 
                src += cur;
                len -= cur;
@@ -3659,9 +3665,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                memset(kaddr + offset, c, cur);
-               kunmap_atomic(kaddr, KM_USER0);
 
                len -= cur;
                offset = 0;
@@ -3692,9 +3697,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
 
                src_offset += cur;
                len -= cur;
@@ -3707,20 +3711,17 @@ static void move_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
 {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        if (dst_page == src_page) {
                memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
        } else {
-               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+               char *src_kaddr = page_address(src_page);
                char *p = dst_kaddr + dst_off + len;
                char *s = src_kaddr + src_off + len;
 
                while (len--)
                        *--p = *--s;
-
-               kunmap_atomic(src_kaddr, KM_USER1);
        }
-       kunmap_atomic(dst_kaddr, KM_USER0);
 }
 
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3733,20 +3734,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
 {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        char *src_kaddr;
 
        if (dst_page != src_page) {
-               src_kaddr = kmap_atomic(src_page, KM_USER1);
+               src_kaddr = page_address(src_page);
        } else {
                src_kaddr = dst_kaddr;
                BUG_ON(areas_overlap(src_off, dst_off, len));
        }
 
        memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-       kunmap_atomic(dst_kaddr, KM_USER0);
-       if (dst_page != src_page)
-               kunmap_atomic(src_kaddr, KM_USER1);
 }
 
 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
index a11a92e..21a7ca9 100644 (file)
@@ -120,8 +120,6 @@ struct extent_state {
 struct extent_buffer {
        u64 start;
        unsigned long len;
-       char *map_token;
-       char *kaddr;
        unsigned long map_start;
        unsigned long map_len;
        struct page *first_page;
@@ -130,14 +128,26 @@ struct extent_buffer {
        struct rcu_head rcu_head;
        atomic_t refs;
 
-       /* the spinlock is used to protect most operations */
-       spinlock_t lock;
+       /* count of read lock holders on the extent buffer */
+       atomic_t write_locks;
+       atomic_t read_locks;
+       atomic_t blocking_writers;
+       atomic_t blocking_readers;
+       atomic_t spinning_readers;
+       atomic_t spinning_writers;
+
+       /* protects write locks */
+       rwlock_t lock;
 
-       /*
-        * when we keep the lock held while blocking, waiters go onto
-        * the wq
+       /* readers use lock_wq while they wait for the write
+        * lock holders to unlock
         */
-       wait_queue_head_t lock_wq;
+       wait_queue_head_t write_lock_wq;
+
+       /* writers use read_lock_wq while they wait for readers
+        * to unlock
+        */
+       wait_queue_head_t read_lock_wq;
 };
 
 static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
 int extent_buffer_uptodate(struct extent_io_tree *tree,
                           struct extent_buffer *eb,
                           struct extent_state *cached_state);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-                     unsigned long min_len, char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km);
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-                     unsigned long min_len, char **token, char **map,
+                     unsigned long min_len, char **map,
                      unsigned long *map_start,
-                     unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+                     unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
                          u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
index 90d4ee5..08bcfa9 100644 (file)
@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 
        WARN_ON(bio->bi_vcnt <= 0);
 
+       /*
+        * the free space stuff is only read when it hasn't been
+        * updated in the current transaction.  So, we can safely
+        * read from the commit root and sidestep a nasty deadlock
+        * between reading the free space cache and updating the csum tree.
+        */
+       if (btrfs_is_free_space_inode(root, inode))
+               path->search_commit_root = 1;
+
        disk_bytenr = (u64)bio->bi_sector << 9;
        if (dio)
                offset = logical_offset;
@@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
        struct btrfs_sector_sum *sector_sum;
        u32 nritems;
        u32 ins_size;
-       char *eb_map;
-       char *eb_token;
-       unsigned long map_len;
-       unsigned long map_start;
        u16 csum_size =
                btrfs_super_csum_size(&root->fs_info->super_copy);
 
@@ -814,30 +819,9 @@ found:
        item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
-       eb_token = NULL;
 next_sector:
 
-       if (!eb_token ||
-          (unsigned long)item + csum_size >= map_start + map_len) {
-               int err;
-
-               if (eb_token)
-                       unmap_extent_buffer(leaf, eb_token, KM_USER1);
-               eb_token = NULL;
-               err = map_private_extent_buffer(leaf, (unsigned long)item,
-                                               csum_size,
-                                               &eb_token, &eb_map,
-                                               &map_start, &map_len, KM_USER1);
-               if (err)
-                       eb_token = NULL;
-       }
-       if (eb_token) {
-               memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
-                      &sector_sum->sum, csum_size);
-       } else {
-               write_extent_buffer(leaf, &sector_sum->sum,
-                                   (unsigned long)item, csum_size);
-       }
+       write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
 
        total_bytes += root->sectorsize;
        sector_sum++;
@@ -850,10 +834,7 @@ next_sector:
                        goto next_sector;
                }
        }
-       if (eb_token) {
-               unmap_extent_buffer(leaf, eb_token, KM_USER1);
-               eb_token = NULL;
-       }
+
        btrfs_mark_buffer_dirty(path->nodes[0]);
        if (total_bytes < sums->len) {
                btrfs_release_path(path);
index 59cbdb1..a35e51c 100644 (file)
@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 
 again:
        for (i = 0; i < num_pages; i++) {
-               pages[i] = grab_cache_page(inode->i_mapping, index + i);
+               pages[i] = find_or_create_page(inode->i_mapping, index + i,
+                                              GFP_NOFS);
                if (!pages[i]) {
                        faili = i - 1;
                        err = -ENOMEM;
@@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                 * managed to copy.
                 */
                if (num_pages > dirty_pages) {
-                       if (copied > 0)
-                               atomic_inc(
-                                       &BTRFS_I(inode)->outstanding_extents);
+                       if (copied > 0) {
+                               spin_lock(&BTRFS_I(inode)->lock);
+                               BTRFS_I(inode)->outstanding_extents++;
+                               spin_unlock(&BTRFS_I(inode)->lock);
+                       }
                        btrfs_delalloc_release_space(inode,
                                        (num_pages - dirty_pages) <<
                                        PAGE_CACHE_SHIFT);
index bf0d615..6377713 100644 (file)
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
                return inode;
 
        spin_lock(&block_group->lock);
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
+               printk(KERN_INFO "Old style space inode found, converting.\n");
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
+               block_group->disk_cache_state = BTRFS_DC_CLEAR;
+       }
+
        if (!btrfs_fs_closing(root->fs_info)) {
                block_group->inode = igrab(inode);
                block_group->iref = 1;
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root,
        btrfs_set_inode_gid(leaf, inode_item, 0);
        btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
        btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
-                             BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+                             BTRFS_INODE_PREALLOC);
        btrfs_set_inode_nlink(leaf, inode_item, 1);
        btrfs_set_inode_transid(leaf, inode_item, trans->transid);
        btrfs_set_inode_block_group(leaf, inode_item, offset);
@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
        struct btrfs_free_space_header *header;
        struct extent_buffer *leaf;
        struct page *page;
-       u32 *checksums = NULL, *crc;
-       char *disk_crcs = NULL;
        struct btrfs_key key;
        struct list_head bitmaps;
        u64 num_entries;
        u64 num_bitmaps;
        u64 generation;
-       u32 cur_crc = ~(u32)0;
        pgoff_t index = 0;
-       unsigned long first_page_offset;
-       int num_checksums;
        int ret = 0;
 
        INIT_LIST_HEAD(&bitmaps);
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
        if (!num_entries)
                goto out;
 
-       /* Setup everything for doing checksumming */
-       num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
-       checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
-       if (!checksums)
-               goto out;
-       first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
-       disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
-       if (!disk_crcs)
-               goto out;
-
        ret = readahead_cache(inode);
        if (ret)
                goto out;
@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                struct btrfs_free_space *e;
                void *addr;
                unsigned long offset = 0;
-               unsigned long start_offset = 0;
                int need_loop = 0;
 
                if (!num_entries && !num_bitmaps)
                        break;
 
-               if (index == 0) {
-                       start_offset = first_page_offset;
-                       offset = start_offset;
-               }
-
-               page = grab_cache_page(inode->i_mapping, index);
+               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
                if (!page)
                        goto free_cache;
 
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                if (index == 0) {
                        u64 *gen;
 
-                       memcpy(disk_crcs, addr, first_page_offset);
-                       gen = addr + (sizeof(u32) * num_checksums);
+                       /*
+                        * We put a bogus crc in the front of the first page in
+                        * case old kernels try to mount a fs with the new
+                        * format to make sure they discard the cache.
+                        */
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
+
+                       gen = addr;
                        if (*gen != BTRFS_I(inode)->generation) {
                                printk(KERN_ERR "btrfs: space cache generation"
                                       " (%llu) does not match inode (%llu)\n",
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                                page_cache_release(page);
                                goto free_cache;
                        }
-                       crc = (u32 *)disk_crcs;
-               }
-               entry = addr + start_offset;
-
-               /* First lets check our crc before we do anything fun */
-               cur_crc = ~(u32)0;
-               cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
-                                         PAGE_CACHE_SIZE - start_offset);
-               btrfs_csum_final(cur_crc, (char *)&cur_crc);
-               if (cur_crc != *crc) {
-                       printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
-                              index);
-                       kunmap(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-                       goto free_cache;
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
                }
-               crc++;
+               entry = addr;
 
                while (1) {
                        if (!num_entries)
@@ -470,8 +448,6 @@ next:
 
        ret = 1;
 out:
-       kfree(checksums);
-       kfree(disk_crcs);
        return ret;
 free_cache:
        __btrfs_remove_free_space_cache(ctl);
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        struct btrfs_key key;
        u64 start, end, len;
        u64 bytes = 0;
-       u32 *crc, *checksums;
-       unsigned long first_page_offset;
+       u32 crc = ~(u32)0;
        int index = 0, num_pages = 0;
        int entries = 0;
        int bitmaps = 0;
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
                PAGE_CACHE_SHIFT;
 
-       /* Since the first page has all of our checksums and our generation we
-        * need to calculate the offset into the page that we can start writing
-        * our entries.
-        */
-       first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
-
        filemap_write_and_wait(inode->i_mapping);
        btrfs_wait_ordered_range(inode, inode->i_size &
                                 ~(root->sectorsize - 1), (u64)-1);
 
-       /* make sure we don't overflow that first page */
-       if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
-               /* this is really the same as running out of space, where we also return 0 */
-               printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
-               ret = 0;
-               goto out_update;
-       }
-
-       /* We need a checksum per page. */
-       crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
-       if (!crc)
-               return -1;
-
        pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
-       if (!pages) {
-               kfree(crc);
+       if (!pages)
                return -1;
-       }
 
        /* Get the cluster for this block_group if it exists */
        if (block_group && !list_empty(&block_group->cluster_list))
@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
         * know and don't freak out.
         */
        while (index < num_pages) {
-               page = grab_cache_page(inode->i_mapping, index);
+               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
                if (!page) {
                        int i;
 
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                unlock_page(pages[i]);
                                page_cache_release(pages[i]);
                        }
-                       goto out_free;
+                       goto out;
                }
                pages[index] = page;
                index++;
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        /* Write out the extent entries */
        do {
                struct btrfs_free_space_entry *entry;
-               void *addr;
+               void *addr, *orig;
                unsigned long offset = 0;
-               unsigned long start_offset = 0;
 
                next_page = false;
 
-               if (index == 0) {
-                       start_offset = first_page_offset;
-                       offset = start_offset;
-               }
-
                if (index >= num_pages) {
                        out_of_space = true;
                        break;
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
                page = pages[index];
 
-               addr = kmap(page);
-               entry = addr + start_offset;
+               orig = addr = kmap(page);
+               if (index == 0) {
+                       u64 *gen;
 
-               memset(addr, 0, PAGE_CACHE_SIZE);
+                       /*
+                        * We're going to put in a bogus crc for this page to
+                        * make sure that old kernels who aren't aware of this
+                        * format will be sure to discard the cache.
+                        */
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
+
+                       gen = addr;
+                       *gen = trans->transid;
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
+               }
+               entry = addr;
+
+               memset(addr, 0, PAGE_CACHE_SIZE - offset);
                while (node && !next_page) {
                        struct btrfs_free_space *e;
 
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                next_page = true;
                        entry++;
                }
-               *crc = ~(u32)0;
-               *crc = btrfs_csum_data(root, addr + start_offset, *crc,
-                                      PAGE_CACHE_SIZE - start_offset);
-               kunmap(page);
 
-               btrfs_csum_final(*crc, (char *)crc);
-               crc++;
+               /* Generate bogus crc value */
+               if (index == 0) {
+                       u32 *tmp;
+                       crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
+                                             PAGE_CACHE_SIZE - sizeof(u64));
+                       btrfs_csum_final(crc, (char *)&crc);
+                       crc++;
+                       tmp = orig;
+                       *tmp = crc;
+               }
+
+               kunmap(page);
 
                bytes += PAGE_CACHE_SIZE;
 
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
                addr = kmap(page);
                memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
-               *crc = ~(u32)0;
-               *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
                kunmap(page);
-               btrfs_csum_final(*crc, (char *)crc);
-               crc++;
                bytes += PAGE_CACHE_SIZE;
 
                list_del_init(&entry->list);
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                     i_size_read(inode) - 1, &cached_state,
                                     GFP_NOFS);
                ret = 0;
-               goto out_free;
+               goto out;
        }
 
        /* Zero out the rest of the pages just to make sure */
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                index++;
        }
 
-       /* Write the checksums and trans id to the first page */
-       {
-               void *addr;
-               u64 *gen;
-
-               page = pages[0];
-
-               addr = kmap(page);
-               memcpy(addr, checksums, sizeof(u32) * num_pages);
-               gen = addr + (sizeof(u32) * num_pages);
-               *gen = trans->transid;
-               kunmap(page);
-       }
-
        ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
                                            bytes, &cached_state);
        btrfs_drop_pages(pages, num_pages);
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        if (ret) {
                ret = 0;
-               goto out_free;
+               goto out;
        }
 
        BTRFS_I(inode)->generation = trans->transid;
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
-               goto out_free;
+               goto out;
        }
        leaf = path->nodes[0];
        if (ret > 0) {
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                         EXTENT_DO_ACCOUNTING, 0, 0, NULL,
                                         GFP_NOFS);
                        btrfs_release_path(path);
-                       goto out_free;
+                       goto out;
                }
        }
        header = btrfs_item_ptr(leaf, path->slots[0],
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        ret = 1;
 
-out_free:
-       kfree(checksums);
+out:
        kfree(pages);
-
-out_update:
        if (ret != 1) {
                invalidate_inode_pages2_range(inode->i_mapping, 0, index);
                BTRFS_I(inode)->generation = 0;
index e91b097..13e6255 100644 (file)
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
        return alloc_hint;
 }
 
-static inline bool is_free_space_inode(struct btrfs_root *root,
-                                      struct inode *inode)
-{
-       if (root == root->fs_info->tree_root ||
-           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
-               return true;
-       return false;
-}
-
 /*
  * when extent_io.c finds a delayed allocation range in the file,
  * the call backs end up in this code.  The basic idea is to
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode,
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
-       BUG_ON(is_free_space_inode(root, inode));
+       BUG_ON(btrfs_is_free_space_inode(root, inode));
        trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
-       nolock = is_free_space_inode(root, inode);
+       nolock = btrfs_is_free_space_inode(root, inode);
 
        if (nolock)
                trans = btrfs_join_transaction_nolock(root);
@@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode,
        if (!(orig->state & EXTENT_DELALLOC))
                return 0;
 
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
 }
 
@@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
        if (!(other->state & EXTENT_DELALLOC))
                return 0;
 
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
 }
 
@@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               bool do_list = !is_free_space_inode(root, inode);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else
-                       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               } else {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
 
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
@@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               bool do_list = !is_free_space_inode(root, inode);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else if (!(*bits & EXTENT_DO_ACCOUNTING))
-                       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+               } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents--;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
 
                if (*bits & EXTENT_DO_ACCOUNTING)
                        btrfs_delalloc_release_metadata(inode, len);
@@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-       if (is_free_space_inode(root, inode))
+       if (btrfs_is_free_space_inode(root, inode))
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
        else
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                return 0;
        BUG_ON(!ordered_extent);
 
-       nolock = is_free_space_inode(root, inode);
+       nolock = btrfs_is_free_space_inode(root, inode);
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
@@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)inode_item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
-
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
        inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
        inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2575,11 +2569,6 @@ cache_acl:
        if (!maybe_acls)
                cache_no_acl(inode);
 
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
-
        btrfs_free_path(path);
 
        switch (inode->i_mode & S_IFMT) {
@@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
-
        btrfs_set_inode_uid(leaf, item, inode->i_uid);
        btrfs_set_inode_gid(leaf, item, inode->i_gid);
        btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
        btrfs_set_inode_block_group(leaf, item, 0);
-
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
 }
 
 /*
@@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
         * The data relocation inode should also be directly updated
         * without delay
         */
-       if (!is_free_space_inode(root, inode)
+       if (!btrfs_is_free_space_inode(root, inode)
            && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
                ret = btrfs_delayed_update_inode(trans, root, inode);
                if (!ret)
@@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
        ret = -ENOMEM;
 again:
-       page = grab_cache_page(mapping, index);
+       page = find_or_create_page(mapping, index, GFP_NOFS);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                goto out;
@@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode)
 
        truncate_inode_pages(&inode->i_data, 0);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
-                              is_free_space_inode(root, inode)))
+                              btrfs_is_free_space_inode(root, inode)))
                goto no_delete;
 
        if (is_bad_inode(inode)) {
@@ -4271,7 +4248,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
 
-       if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+       if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
                nolock = true;
 
        if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4467,7 +4444,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        inode->i_generation = BTRFS_I(inode)->generation;
        btrfs_set_inode_space_info(root, inode);
 
-       if (mode & S_IFDIR)
+       if (S_ISDIR(mode))
                owner = 0;
        else
                owner = 1;
@@ -4512,7 +4489,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
        btrfs_inherit_iflags(inode, dir);
 
-       if ((mode & S_IFREG)) {
+       if (S_ISREG(mode)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
                if (btrfs_test_opt(root, NODATACOW) ||
@@ -6728,8 +6705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
 
-       atomic_set(&ei->outstanding_extents, 0);
-       atomic_set(&ei->reserved_extents, 0);
+       spin_lock_init(&ei->lock);
+       ei->outstanding_extents = 0;
+       ei->reserved_extents = 0;
 
        ei->ordered_data_close = 0;
        ei->orphan_meta_reserved = 0;
@@ -6767,8 +6745,8 @@ void btrfs_destroy_inode(struct inode *inode)
 
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-       WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+       WARN_ON(BTRFS_I(inode)->outstanding_extents);
+       WARN_ON(BTRFS_I(inode)->reserved_extents);
 
        /*
         * This can happen where we create an inode, but somebody else also
@@ -6823,7 +6801,7 @@ int btrfs_drop_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        if (btrfs_root_refs(&root->root_item) == 0 &&
-           !is_free_space_inode(root, inode))
+           !btrfs_is_free_space_inode(root, inode))
                return 1;
        else
                return generic_drop_inode(inode);
index 6225433..0b980af 100644 (file)
@@ -859,8 +859,8 @@ again:
        /* step one, lock all the pages */
        for (i = 0; i < num_pages; i++) {
                struct page *page;
-               page = grab_cache_page(inode->i_mapping,
-                                           start_index + i);
+               page = find_or_create_page(inode->i_mapping,
+                                           start_index + i, GFP_NOFS);
                if (!page)
                        break;
 
@@ -930,7 +930,9 @@ again:
                          GFP_NOFS);
 
        if (i_done != num_pages) {
-               atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               spin_lock(&BTRFS_I(inode)->lock);
+               BTRFS_I(inode)->outstanding_extents++;
+               spin_unlock(&BTRFS_I(inode)->lock);
                btrfs_delalloc_release_space(inode,
                                     (num_pages - i_done) << PAGE_CACHE_SHIFT);
        }
index 66fa43d..d77b67c 100644 (file)
 #include "extent_io.h"
 #include "locking.h"
 
-static inline void spin_nested(struct extent_buffer *eb)
-{
-       spin_lock(&eb->lock);
-}
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
 
 /*
- * Setting a lock to blocking will drop the spinlock and set the
- * flag that forces other procs who want the lock to wait.  After
- * this you can safely schedule with the lock held.
+ * if we currently have a spinning reader or writer lock
+ * (indicated by the rw flag) this will bump the count
+ * of blocking holders and drop the spinlock.
  */
-void btrfs_set_lock_blocking(struct extent_buffer *eb)
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-       if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-               set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-               spin_unlock(&eb->lock);
+       if (rw == BTRFS_WRITE_LOCK) {
+               if (atomic_read(&eb->blocking_writers) == 0) {
+                       WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+                       atomic_dec(&eb->spinning_writers);
+                       btrfs_assert_tree_locked(eb);
+                       atomic_inc(&eb->blocking_writers);
+                       write_unlock(&eb->lock);
+               }
+       } else if (rw == BTRFS_READ_LOCK) {
+               btrfs_assert_tree_read_locked(eb);
+               atomic_inc(&eb->blocking_readers);
+               WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+               atomic_dec(&eb->spinning_readers);
+               read_unlock(&eb->lock);
        }
-       /* exit with the spin lock released and the bit set */
+       return;
 }
 
 /*
- * clearing the blocking flag will take the spinlock again.
- * After this you can't safely schedule
+ * if we currently have a blocking lock, take the spinlock
+ * and drop our blocking count
  */
-void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-       if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-               spin_nested(eb);
-               clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-               smp_mb__after_clear_bit();
+       if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
+               BUG_ON(atomic_read(&eb->blocking_writers) != 1);
+               write_lock(&eb->lock);
+               WARN_ON(atomic_read(&eb->spinning_writers));
+               atomic_inc(&eb->spinning_writers);
+               if (atomic_dec_and_test(&eb->blocking_writers))
+                       wake_up(&eb->write_lock_wq);
+       } else if (rw == BTRFS_READ_LOCK_BLOCKING) {
+               BUG_ON(atomic_read(&eb->blocking_readers) == 0);
+               read_lock(&eb->lock);
+               atomic_inc(&eb->spinning_readers);
+               if (atomic_dec_and_test(&eb->blocking_readers))
+                       wake_up(&eb->read_lock_wq);
        }
-       /* exit with the spin lock held */
+       return;
 }
 
 /*
- * unfortunately, many of the places that currently set a lock to blocking
- * don't end up blocking for very long, and often they don't block
- * at all.  For a dbench 50 run, if we don't spin on the blocking bit
- * at all, the context switch rate can jump up to 400,000/sec or more.
- *
- * So, we're still stuck with this crummy spin on the blocking bit,
- * at least until the most common causes of the short blocks
- * can be dealt with.
+ * take a spinning read lock.  This will wait for any blocking
+ * writers
  */
-static int btrfs_spin_on_block(struct extent_buffer *eb)
+void btrfs_tree_read_lock(struct extent_buffer *eb)
 {
-       int i;
-
-       for (i = 0; i < 512; i++) {
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 1;
-               if (need_resched())
-                       break;
-               cpu_relax();
+again:
+       wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+       read_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers)) {
+               read_unlock(&eb->lock);
+               wait_event(eb->write_lock_wq,
+                          atomic_read(&eb->blocking_writers) == 0);
+               goto again;
        }
-       return 0;
+       atomic_inc(&eb->read_locks);
+       atomic_inc(&eb->spinning_readers);
 }
 
 /*
- * This is somewhat different from trylock.  It will take the
- * spinlock but if it finds the lock is set to blocking, it will
- * return without the lock held.
- *
- * returns 1 if it was able to take the lock and zero otherwise
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
  */
-int btrfs_try_spin_lock(struct extent_buffer *eb)
+int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 {
-       int i;
+       if (atomic_read(&eb->blocking_writers))
+               return 0;
 
-       if (btrfs_spin_on_block(eb)) {
-               spin_nested(eb);
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 1;
-               spin_unlock(&eb->lock);
+       read_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers)) {
+               read_unlock(&eb->lock);
+               return 0;
        }
-       /* spin for a bit on the BLOCKING flag */
-       for (i = 0; i < 2; i++) {
-               cpu_relax();
-               if (!btrfs_spin_on_block(eb))
-                       break;
-
-               spin_nested(eb);
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 1;
-               spin_unlock(&eb->lock);
-       }
-       return 0;
+       atomic_inc(&eb->read_locks);
+       atomic_inc(&eb->spinning_readers);
+       return 1;
 }
 
 /*
- * the autoremove wake function will return 0 if it tried to wake up
- * a process that was already awake, which means that process won't
- * count as an exclusive wakeup.  The waitq code will continue waking
- * procs until it finds one that was actually sleeping.
- *
- * For btrfs, this isn't quite what we want.  We want a single proc
- * to be notified that the lock is ready for taking.  If that proc
- * already happen to be awake, great, it will loop around and try for
- * the lock.
- *
- * So, btrfs_wake_function always returns 1, even when the proc that we
- * tried to wake up was already awake.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers or readers
  */
-static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
-                              int sync, void *key)
+int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 {
-       autoremove_wake_function(wait, mode, sync, key);
+       if (atomic_read(&eb->blocking_writers) ||
+           atomic_read(&eb->blocking_readers))
+               return 0;
+       write_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers) ||
+           atomic_read(&eb->blocking_readers)) {
+               write_unlock(&eb->lock);
+               return 0;
+       }
+       atomic_inc(&eb->write_locks);
+       atomic_inc(&eb->spinning_writers);
        return 1;
 }
 
 /*
- * returns with the extent buffer spinlocked.
- *
- * This will spin and/or wait as required to take the lock, and then
- * return with the spinlock held.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * drop a spinning read lock
+ */
+void btrfs_tree_read_unlock(struct extent_buffer *eb)
+{
+       btrfs_assert_tree_read_locked(eb);
+       WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+       atomic_dec(&eb->spinning_readers);
+       atomic_dec(&eb->read_locks);
+       read_unlock(&eb->lock);
+}
+
+/*
+ * drop a blocking read lock
+ */
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
+{
+       btrfs_assert_tree_read_locked(eb);
+       WARN_ON(atomic_read(&eb->blocking_readers) == 0);
+       if (atomic_dec_and_test(&eb->blocking_readers))
+               wake_up(&eb->read_lock_wq);
+       atomic_dec(&eb->read_locks);
+}
+
+/*
+ * take a spinning write lock.  This will wait for both
+ * blocking readers or writers
  */
 int btrfs_tree_lock(struct extent_buffer *eb)
 {
-       DEFINE_WAIT(wait);
-       wait.func = btrfs_wake_function;
-
-       if (!btrfs_spin_on_block(eb))
-               goto sleep;
-
-       while(1) {
-               spin_nested(eb);
-
-               /* nobody is blocking, exit with the spinlock held */
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 0;
-
-               /*
-                * we have the spinlock, but the real owner is blocking.
-                * wait for them
-                */
-               spin_unlock(&eb->lock);
-
-               /*
-                * spin for a bit, and if the blocking flag goes away,
-                * loop around
-                */
-               cpu_relax();
-               if (btrfs_spin_on_block(eb))
-                       continue;
-sleep:
-               prepare_to_wait_exclusive(&eb->lock_wq, &wait,
-                                         TASK_UNINTERRUPTIBLE);
-
-               if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       schedule();
-
-               finish_wait(&eb->lock_wq, &wait);
+again:
+       wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
+       wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+       write_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_readers)) {
+               write_unlock(&eb->lock);
+               wait_event(eb->read_lock_wq,
+                          atomic_read(&eb->blocking_readers) == 0);
+               goto again;
        }
+       if (atomic_read(&eb->blocking_writers)) {
+               write_unlock(&eb->lock);
+               wait_event(eb->write_lock_wq,
+                          atomic_read(&eb->blocking_writers) == 0);
+               goto again;
+       }
+       WARN_ON(atomic_read(&eb->spinning_writers));
+       atomic_inc(&eb->spinning_writers);
+       atomic_inc(&eb->write_locks);
        return 0;
 }
 
+/*
+ * drop a spinning or a blocking write lock.
+ */
 int btrfs_tree_unlock(struct extent_buffer *eb)
 {
-       /*
-        * if we were a blocking owner, we don't have the spinlock held
-        * just clear the bit and look for waiters
-        */
-       if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-               smp_mb__after_clear_bit();
-       else
-               spin_unlock(&eb->lock);
-
-       if (waitqueue_active(&eb->lock_wq))
-               wake_up(&eb->lock_wq);
+       int blockers = atomic_read(&eb->blocking_writers);
+
+       BUG_ON(blockers > 1);
+
+       btrfs_assert_tree_locked(eb);
+       atomic_dec(&eb->write_locks);
+
+       if (blockers) {
+               WARN_ON(atomic_read(&eb->spinning_writers));
+               atomic_dec(&eb->blocking_writers);
+               smp_wmb();
+               wake_up(&eb->write_lock_wq);
+       } else {
+               WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+               atomic_dec(&eb->spinning_writers);
+               write_unlock(&eb->lock);
+       }
        return 0;
 }
 
 void btrfs_assert_tree_locked(struct extent_buffer *eb)
 {
-       if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-               assert_spin_locked(&eb->lock);
+       BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+       BUG_ON(!atomic_read(&eb->read_locks));
 }
index 5c33a56..17247dd 100644 (file)
 #ifndef __BTRFS_LOCKING_
 #define __BTRFS_LOCKING_
 
+#define BTRFS_WRITE_LOCK 1
+#define BTRFS_READ_LOCK 2
+#define BTRFS_WRITE_LOCK_BLOCKING 3
+#define BTRFS_READ_LOCK_BLOCKING 4
+
 int btrfs_tree_lock(struct extent_buffer *eb);
 int btrfs_tree_unlock(struct extent_buffer *eb);
 int btrfs_try_spin_lock(struct extent_buffer *eb);
 
-void btrfs_set_lock_blocking(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_tree_read_lock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
+int btrfs_try_tree_read_lock(struct extent_buffer *eb);
+int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+
+static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
+{
+       if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
+               btrfs_tree_unlock(eb);
+       else if (rw == BTRFS_READ_LOCK_BLOCKING)
+               btrfs_tree_read_unlock_blocking(eb);
+       else if (rw == BTRFS_READ_LOCK)
+               btrfs_tree_read_unlock(eb);
+       else
+               BUG();
+}
+
+static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
+{
+       btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
+}
+
+static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+{
+       btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
+}
 #endif
index 5e0a3dc..59bb176 100644 (file)
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        page_cache_sync_readahead(inode->i_mapping,
                                                  ra, NULL, index,
                                                  last_index + 1 - index);
-                       page = grab_cache_page(inode->i_mapping, index);
+                       page = find_or_create_page(inode->i_mapping, index,
+                                                  GFP_NOFS);
                        if (!page) {
                                btrfs_delalloc_release_metadata(inode,
                                                        PAGE_CACHE_SIZE);
index c0f7eca..bc1f6ad 100644 (file)
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb,                              \
        unsigned long part_offset = (unsigned long)s;                   \
        unsigned long offset = part_offset + offsetof(type, member);    \
        type *p;                                                        \
-       /* ugly, but we want the fast path here */                      \
-       if (eb->map_token && offset >= eb->map_start &&                 \
-           offset + sizeof(((type *)0)->member) <= eb->map_start +     \
-           eb->map_len) {                                              \
-               p = (type *)(eb->kaddr + part_offset - eb->map_start);  \
-               return le##bits##_to_cpu(p->member);                    \
-       }                                                               \
-       {                                                               \
-               int err;                                                \
-               char *map_token;                                        \
-               char *kaddr;                                            \
-               int unmap_on_exit = (eb->map_token == NULL);            \
-               unsigned long map_start;                                \
-               unsigned long map_len;                                  \
-               u##bits res;                                            \
-               err = map_extent_buffer(eb, offset,                     \
-                               sizeof(((type *)0)->member),            \
-                               &map_token, &kaddr,                     \
-                               &map_start, &map_len, KM_USER1);        \
-               if (err) {                                              \
-                       __le##bits leres;                               \
-                       read_eb_member(eb, s, type, member, &leres);    \
-                       return le##bits##_to_cpu(leres);                \
-               }                                                       \
-               p = (type *)(kaddr + part_offset - map_start);          \
-               res = le##bits##_to_cpu(p->member);                     \
-               if (unmap_on_exit)                                      \
-                       unmap_extent_buffer(eb, map_token, KM_USER1);   \
-               return res;                                             \
-       }                                                               \
+       int err;                                                \
+       char *kaddr;                                            \
+       unsigned long map_start;                                \
+       unsigned long map_len;                                  \
+       u##bits res;                                            \
+       err = map_private_extent_buffer(eb, offset,             \
+                       sizeof(((type *)0)->member),            \
+                       &kaddr, &map_start, &map_len);          \
+       if (err) {                                              \
+               __le##bits leres;                               \
+               read_eb_member(eb, s, type, member, &leres);    \
+               return le##bits##_to_cpu(leres);                \
+       }                                                       \
+       p = (type *)(kaddr + part_offset - map_start);          \
+       res = le##bits##_to_cpu(p->member);                     \
+       return res;                                             \
 }                                                                      \
 void btrfs_set_##name(struct extent_buffer *eb,                                \
                                    type *s, u##bits val)               \
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb,                             \
        unsigned long part_offset = (unsigned long)s;                   \
        unsigned long offset = part_offset + offsetof(type, member);    \
        type *p;                                                        \
-       /* ugly, but we want the fast path here */                      \
-       if (eb->map_token && offset >= eb->map_start &&                 \
-           offset + sizeof(((type *)0)->member) <= eb->map_start +     \
-           eb->map_len) {                                              \
-               p = (type *)(eb->kaddr + part_offset - eb->map_start);  \
-               p->member = cpu_to_le##bits(val);                       \
-               return;                                                 \
-       }                                                               \
-       {                                                               \
-               int err;                                                \
-               char *map_token;                                        \
-               char *kaddr;                                            \
-               int unmap_on_exit = (eb->map_token == NULL);            \
-               unsigned long map_start;                                \
-               unsigned long map_len;                                  \
-               err = map_extent_buffer(eb, offset,                     \
-                               sizeof(((type *)0)->member),            \
-                               &map_token, &kaddr,                     \
-                               &map_start, &map_len, KM_USER1);        \
-               if (err) {                                              \
-                       __le##bits val2;                                \
-                       val2 = cpu_to_le##bits(val);                    \
-                       write_eb_member(eb, s, type, member, &val2);    \
-                       return;                                         \
-               }                                                       \
-               p = (type *)(kaddr + part_offset - map_start);          \
-               p->member = cpu_to_le##bits(val);                       \
-               if (unmap_on_exit)                                      \
-                       unmap_extent_buffer(eb, map_token, KM_USER1);   \
-       }                                                               \
+       int err;                                                \
+       char *kaddr;                                            \
+       unsigned long map_start;                                \
+       unsigned long map_len;                                  \
+       err = map_private_extent_buffer(eb, offset,             \
+                       sizeof(((type *)0)->member),            \
+                       &kaddr, &map_start, &map_len);          \
+       if (err) {                                              \
+               __le##bits val2;                                \
+               val2 = cpu_to_le##bits(val);                    \
+               write_eb_member(eb, s, type, member, &val2);    \
+               return;                                         \
+       }                                                       \
+       p = (type *)(kaddr + part_offset - map_start);          \
+       p->member = cpu_to_le##bits(val);                       \
 }
 
 #include "ctree.h"
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb,
                    struct btrfs_disk_key *disk_key, int nr)
 {
        unsigned long ptr = btrfs_node_key_ptr_offset(nr);
-       if (eb->map_token && ptr >= eb->map_start &&
-           ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
-               memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
-                       sizeof(*disk_key));
-               return;
-       } else if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, KM_USER1);
-               eb->map_token = NULL;
-       }
        read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
                       struct btrfs_key_ptr, key, disk_key);
 }
index 51dcec8..eb55863 100644 (file)
@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
-       int retries = 0;
+       u64 num_bytes = 0;
        int ret;
 
        if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
                h->block_rsv = NULL;
                goto got_it;
        }
+
+       /*
+        * Do the reservation before we join the transaction so we can do all
+        * the appropriate flushing if need be.
+        */
+       if (num_items > 0 && root != root->fs_info->chunk_root) {
+               num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+               ret = btrfs_block_rsv_add(NULL, root,
+                                         &root->fs_info->trans_block_rsv,
+                                         num_bytes);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
 again:
        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
        if (!h)
@@ -310,24 +323,9 @@ again:
                goto again;
        }
 
-       if (num_items > 0) {
-               ret = btrfs_trans_reserve_metadata(h, root, num_items);
-               if (ret == -EAGAIN && !retries) {
-                       retries++;
-                       btrfs_commit_transaction(h, root);
-                       goto again;
-               } else if (ret == -EAGAIN) {
-                       /*
-                        * We have already retried and got EAGAIN, so really we
-                        * don't have space, so set ret to -ENOSPC.
-                        */
-                       ret = -ENOSPC;
-               }
-
-               if (ret < 0) {
-                       btrfs_end_transaction(h, root);
-                       return ERR_PTR(ret);
-               }
+       if (num_bytes) {
+               h->block_rsv = &root->fs_info->trans_block_rsv;
+               h->bytes_reserved = num_bytes;
        }
 
 got_it:
@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        }
 
        if (lock && cur_trans->blocked && !cur_trans->in_commit) {
-               if (throttle)
+               if (throttle) {
+                       /*
+                        * We may race with somebody else here so end up having
+                        * to call end_transaction on ourselves again, so inc
+                        * our use_count.
+                        */
+                       trans->use_count++;
                        return btrfs_commit_transaction(trans, root);
-               else
+               } else {
                        wake_up_process(info->transaction_kthread);
+               }
        }
 
        WARN_ON(cur_trans != info->running_transaction);
index 4ce8a9f..ac278dd 100644 (file)
@@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                btrfs_read_buffer(next, ptr_gen);
 
                                btrfs_tree_lock(next);
-                               clean_tree_block(trans, root, next);
                                btrfs_set_lock_blocking(next);
+                               clean_tree_block(trans, root, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
 
@@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                next = path->nodes[*level];
 
                                btrfs_tree_lock(next);
-                               clean_tree_block(trans, root, next);
                                btrfs_set_lock_blocking(next);
+                               clean_tree_block(trans, root, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
 
@@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                        next = path->nodes[orig_level];
 
                        btrfs_tree_lock(next);
-                       clean_tree_block(trans, log, next);
                        btrfs_set_lock_blocking(next);
+                       clean_tree_block(trans, log, next);
                        btrfs_wait_tree_block_writeback(next);
                        btrfs_tree_unlock(next);
 
index 19450bc..b89e372 100644 (file)
@@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        if (!sb)
                return -ENOMEM;
        btrfs_set_buffer_uptodate(sb);
-       btrfs_set_buffer_lockdep_class(sb, 0);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
 
        write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
        array_size = btrfs_super_sys_array_size(super_copy);
index 5366fe4..d733b9c 100644 (file)
@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       /* first lets see if we already have this xattr */
-       di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-                               strlen(name), -1);
-       if (IS_ERR(di)) {
-               ret = PTR_ERR(di);
-               goto out;
-       }
-
-       /* ok we already have this xattr, lets remove it */
-       if (di) {
-               /* if we want create only exit */
-               if (flags & XATTR_CREATE) {
-                       ret = -EEXIST;
+       if (flags & XATTR_REPLACE) {
+               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
+                                       name_len, -1);
+               if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+                       goto out;
+               } else if (!di) {
+                       ret = -ENODATA;
                        goto out;
                }
-
                ret = btrfs_delete_one_dir_name(trans, root, path, di);
-               BUG_ON(ret);
+               if (ret)
+                       goto out;
                btrfs_release_path(path);
+       }
 
-               /* if we don't have a value then we are removing the xattr */
-               if (!value)
+again:
+       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+                                     name, name_len, value, size);
+       if (ret == -EEXIST) {
+               if (flags & XATTR_CREATE)
                        goto out;
-       } else {
+               /*
+                * We can't use the path we already have since we won't have the
+                * proper locking for a delete, so release the path and
+                * re-lookup to delete the thing.
+                */
                btrfs_release_path(path);
+               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+                                       name, name_len, -1);
+               if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+                       goto out;
+               } else if (!di) {
+                       /* Shouldn't happen but just in case... */
+                       btrfs_release_path(path);
+                       goto again;
+               }
 
-               if (flags & XATTR_REPLACE) {
-                       /* we couldn't find the attr to replace */
-                       ret = -ENODATA;
+               ret = btrfs_delete_one_dir_name(trans, root, path, di);
+               if (ret)
                        goto out;
+
+               /*
+                * We have a value to set, so go back and try to insert it now.
+                */
+               if (value) {
+                       btrfs_release_path(path);
+                       goto again;
                }
        }
-
-       /* ok we have to create a completely new xattr */
-       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-                                     name, name_len, value, size);
-       BUG_ON(ret);
 out:
        btrfs_free_path(path);
        return ret;
index be18598..b05aac3 100644 (file)
@@ -2138,8 +2138,9 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.  Caller hold
- * rename_lock.
+ * dcache entries should not be moved in this way. Caller must hold
+ * rename_lock, the i_mutex of the source and target directories,
+ * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
  */
 static void __d_move(struct dentry * dentry, struct dentry * target)
 {
@@ -2202,7 +2203,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way. See the locking
+ * requirements for __d_move.
  */
 void d_move(struct dentry *dentry, struct dentry *target)
 {
@@ -2320,7 +2322,8 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
  * @inode: inode to bind to the dentry, to which aliases may be attached
  *
  * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one
+ * root directory alias in its place if there is one. Caller must hold the
+ * i_mutex of the parent directory.
  */
 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 {
index 43c7c43..b36c557 100644 (file)
@@ -29,6 +29,7 @@
 #define ECRYPTFS_KERNEL_H
 
 #include <keys/user-type.h>
+#include <keys/encrypted-type.h>
 #include <linux/fs.h>
 #include <linux/fs_stack.h>
 #include <linux/namei.h>
 #include <linux/hash.h>
 #include <linux/nsproxy.h>
 #include <linux/backing-dev.h>
+#include <linux/ecryptfs.h>
 
-/* Version verification for shared data structures w/ userspace */
-#define ECRYPTFS_VERSION_MAJOR 0x00
-#define ECRYPTFS_VERSION_MINOR 0x04
-#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
-/* These flags indicate which features are supported by the kernel
- * module; userspace tools such as the mount helper read
- * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
- * how to behave. */
-#define ECRYPTFS_VERSIONING_PASSPHRASE            0x00000001
-#define ECRYPTFS_VERSIONING_PUBKEY                0x00000002
-#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
-#define ECRYPTFS_VERSIONING_POLICY                0x00000008
-#define ECRYPTFS_VERSIONING_XATTR                 0x00000010
-#define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
-#define ECRYPTFS_VERSIONING_DEVMISC               0x00000040
-#define ECRYPTFS_VERSIONING_HMAC                  0x00000080
-#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION   0x00000100
-#define ECRYPTFS_VERSIONING_GCM                   0x00000200
-#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
-                                 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
-                                 | ECRYPTFS_VERSIONING_PUBKEY \
-                                 | ECRYPTFS_VERSIONING_XATTR \
-                                 | ECRYPTFS_VERSIONING_MULTKEY \
-                                 | ECRYPTFS_VERSIONING_DEVMISC \
-                                 | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
-#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
-#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
-#define ECRYPTFS_SALT_SIZE 8
-#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
-/* The original signature size is only for what is stored on disk; all
- * in-memory representations are expanded hex, so it better adapted to
- * be passed around or referenced on the command line */
-#define ECRYPTFS_SIG_SIZE 8
-#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
-#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
-#define ECRYPTFS_MAX_KEY_BYTES 64
-#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
 #define ECRYPTFS_DEFAULT_IV_BYTES 16
-#define ECRYPTFS_FILE_VERSION 0x03
 #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
 #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
 #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
 #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
-#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
 #define ECRYPTFS_DEFAULT_NUM_USERS 4
 #define ECRYPTFS_MAX_NUM_USERS 32768
 #define ECRYPTFS_XATTR_NAME "user.ecryptfs"
 
-#define RFC2440_CIPHER_DES3_EDE 0x02
-#define RFC2440_CIPHER_CAST_5 0x03
-#define RFC2440_CIPHER_BLOWFISH 0x04
-#define RFC2440_CIPHER_AES_128 0x07
-#define RFC2440_CIPHER_AES_192 0x08
-#define RFC2440_CIPHER_AES_256 0x09
-#define RFC2440_CIPHER_TWOFISH 0x0a
-#define RFC2440_CIPHER_CAST_6 0x0b
-
-#define RFC2440_CIPHER_RSA 0x01
-
-/**
- * For convenience, we may need to pass around the encrypted session
- * key between kernel and userspace because the authentication token
- * may not be extractable.  For example, the TPM may not release the
- * private key, instead requiring the encrypted data and returning the
- * decrypted data.
- */
-struct ecryptfs_session_key {
-#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
-#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
-#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
-#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
-       u32 flags;
-       u32 encrypted_key_size;
-       u32 decrypted_key_size;
-       u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
-       u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
-};
-
-struct ecryptfs_password {
-       u32 password_bytes;
-       s32 hash_algo;
-       u32 hash_iterations;
-       u32 session_key_encryption_key_bytes;
-#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
-#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
-       u32 flags;
-       /* Iterated-hash concatenation of salt and passphrase */
-       u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
-       u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
-       /* Always in expanded hex */
-       u8 salt[ECRYPTFS_SALT_SIZE];
-};
-
-enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
-
-struct ecryptfs_private_key {
-       u32 key_size;
-       u32 data_len;
-       u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
-       char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
-       u8 data[];
-};
-
-/* May be a password or a private key */
-struct ecryptfs_auth_tok {
-       u16 version; /* 8-bit major and 8-bit minor */
-       u16 token_type;
-#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
-       u32 flags;
-       struct ecryptfs_session_key session_key;
-       u8 reserved[32];
-       union {
-               struct ecryptfs_password password;
-               struct ecryptfs_private_key private_key;
-       } token;
-} __attribute__ ((packed));
-
 void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
 extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
 extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
@@ -185,11 +79,47 @@ struct ecryptfs_page_crypt_context {
        } param;
 };
 
+#if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE)
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_encrypted_key_payload_data(struct key *key)
+{
+       if (key->type == &key_type_encrypted)
+               return (struct ecryptfs_auth_tok *)
+                       (&((struct encrypted_key_payload *)key->payload.data)->payload_data);
+       else
+               return NULL;
+}
+
+static inline struct key *ecryptfs_get_encrypted_key(char *sig)
+{
+       return request_key(&key_type_encrypted, sig, NULL);
+}
+
+#else
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_encrypted_key_payload_data(struct key *key)
+{
+       return NULL;
+}
+
+static inline struct key *ecryptfs_get_encrypted_key(char *sig)
+{
+       return ERR_PTR(-ENOKEY);
+}
+
+#endif /* CONFIG_ENCRYPTED_KEYS */
+
 static inline struct ecryptfs_auth_tok *
 ecryptfs_get_key_payload_data(struct key *key)
 {
-       return (struct ecryptfs_auth_tok *)
-               (((struct user_key_payload*)key->payload.data)->data);
+       struct ecryptfs_auth_tok *auth_tok;
+
+       auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
+       if (!auth_tok)
+               return (struct ecryptfs_auth_tok *)
+                       (((struct user_key_payload *)key->payload.data)->data);
+       else
+               return auth_tok;
 }
 
 #define ECRYPTFS_MAX_KEYSET_SIZE 1024
index fa8049e..c472533 100644 (file)
@@ -1635,11 +1635,14 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
 
        (*auth_tok_key) = request_key(&key_type_user, sig, NULL);
        if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
-               printk(KERN_ERR "Could not find key with description: [%s]\n",
-                      sig);
-               rc = process_request_key_err(PTR_ERR(*auth_tok_key));
-               (*auth_tok_key) = NULL;
-               goto out;
+               (*auth_tok_key) = ecryptfs_get_encrypted_key(sig);
+               if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
+                       printk(KERN_ERR "Could not find key with description: [%s]\n",
+                             sig);
+                       rc = process_request_key_err(PTR_ERR(*auth_tok_key));
+                       (*auth_tok_key) = NULL;
+                       goto out;
+               }
        }
        down_write(&(*auth_tok_key)->sem);
        rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok);
index 5c0a6a4..503bfb0 100644 (file)
@@ -61,7 +61,6 @@ extern int ext2_init_acl (struct inode *, struct inode *);
 #else
 #include <linux/sched.h>
 #define ext2_get_acl   NULL
-#define ext2_get_acl   NULL
 #define ext2_set_acl   NULL
 
 static inline int
index 516516e..3bc073a 100644 (file)
@@ -1018,13 +1018,13 @@ hostdata_error:
                fsname++;
        if (lm->lm_mount == NULL) {
                fs_info(sdp, "Now mounting FS...\n");
-               complete(&sdp->sd_locking_init);
+               complete_all(&sdp->sd_locking_init);
                return 0;
        }
        ret = lm->lm_mount(sdp, fsname);
        if (ret == 0)
                fs_info(sdp, "Joined cluster. Now mounting FS...\n");
-       complete(&sdp->sd_locking_init);
+       complete_all(&sdp->sd_locking_init);
        return ret;
 }
 
index a48fa53..d0c72ff 100644 (file)
@@ -361,9 +361,11 @@ EXPORT_SYMBOL_GPL(inode_sb_list_add);
 
 static inline void inode_sb_list_del(struct inode *inode)
 {
-       spin_lock(&inode_sb_list_lock);
-       list_del_init(&inode->i_sb_list);
-       spin_unlock(&inode_sb_list_lock);
+       if (!list_empty(&inode->i_sb_list)) {
+               spin_lock(&inode_sb_list_lock);
+               list_del_init(&inode->i_sb_list);
+               spin_unlock(&inode_sb_list_lock);
+       }
 }
 
 static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -796,6 +798,29 @@ unsigned int get_next_ino(void)
 EXPORT_SYMBOL(get_next_ino);
 
 /**
+ *     new_inode_pseudo        - obtain an inode
+ *     @sb: superblock
+ *
+ *     Allocates a new inode for given superblock.
+ *     Inode wont be chained in superblock s_inodes list
+ *     This means :
+ *     - fs can't be unmount
+ *     - quotas, fsnotify, writeback can't work
+ */
+struct inode *new_inode_pseudo(struct super_block *sb)
+{
+       struct inode *inode = alloc_inode(sb);
+
+       if (inode) {
+               spin_lock(&inode->i_lock);
+               inode->i_state = 0;
+               spin_unlock(&inode->i_lock);
+               INIT_LIST_HEAD(&inode->i_sb_list);
+       }
+       return inode;
+}
+
+/**
  *     new_inode       - obtain an inode
  *     @sb: superblock
  *
@@ -813,13 +838,9 @@ struct inode *new_inode(struct super_block *sb)
 
        spin_lock_prefetch(&inode_sb_list_lock);
 
-       inode = alloc_inode(sb);
-       if (inode) {
-               spin_lock(&inode->i_lock);
-               inode->i_state = 0;
-               spin_unlock(&inode->i_lock);
+       inode = new_inode_pseudo(sb);
+       if (inode)
                inode_sb_list_add(inode);
-       }
        return inode;
 }
 EXPORT_SYMBOL(new_inode);
index eeead33..b81b35d 100644 (file)
@@ -80,7 +80,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
                                  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
        if (ret) {
                jffs2_free_raw_inode(ri);
-               if (S_ISLNK(inode->i_mode & S_IFMT))
+               if (S_ISLNK(inode->i_mode))
                         kfree(mdata);
                return ret;
        }
index 4496872..9cbd11a 100644 (file)
@@ -3161,7 +3161,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
 {
        int rc;
        int dbitno, word, rembits, nb, nwords, wbitno, agno;
-       s8 oldroot, *leaf;
+       s8 oldroot;
        struct dmaptree *tp = (struct dmaptree *) & dp->tree;
 
        /* save the current value of the root (i.e. maximum free string)
@@ -3169,9 +3169,6 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
         */
        oldroot = tp->stree[ROOT];
 
-       /* pick up a pointer to the leaves of the dmap tree */
-       leaf = tp->stree + LEAFIND;
-
        /* determine the bit number and word within the dmap of the
         * starting block.
         */
index f6cc0c0..af96060 100644 (file)
@@ -1143,7 +1143,6 @@ int txCommit(tid_t tid,           /* transaction identifier */
        struct jfs_log *log;
        struct tblock *tblk;
        struct lrd *lrd;
-       int lsn;
        struct inode *ip;
        struct jfs_inode_info *jfs_ip;
        int k, n;
@@ -1310,7 +1309,7 @@ int txCommit(tid_t tid,           /* transaction identifier */
         */
        lrd->type = cpu_to_le16(LOG_COMMIT);
        lrd->length = 0;
-       lsn = lmLog(log, tblk, lrd, NULL);
+       lmLog(log, tblk, lrd, NULL);
 
        lmGroupCommit(log, tblk);
 
@@ -2935,7 +2934,6 @@ int jfs_sync(void *arg)
 {
        struct inode *ip;
        struct jfs_inode_info *jfs_ip;
-       int rc;
        tid_t tid;
 
        do {
@@ -2961,7 +2959,7 @@ int jfs_sync(void *arg)
                                 */
                                TXN_UNLOCK();
                                tid = txBegin(ip->i_sb, COMMIT_INODE);
-                               rc = txCommit(tid, 1, &ip, 0);
+                               txCommit(tid, 1, &ip, 0);
                                txEnd(tid);
                                mutex_unlock(&jfs_ip->commit_mutex);
 
index 29b1f1a..e17545e 100644 (file)
@@ -893,7 +893,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
        unchar *i_fastsymlink;
        s64 xlen = 0;
        int bmask = 0, xsize;
-       s64 extent = 0, xaddr;
+       s64 xaddr;
        struct metapage *mp;
        struct super_block *sb;
        struct tblock *tblk;
@@ -993,7 +993,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
                        txAbort(tid, 0);
                        goto out3;
                }
-               extent = xaddr;
                ip->i_size = ssize - 1;
                while (ssize) {
                        /* This is kind of silly since PATH_MAX == 4K */
index e374050..8392cb8 100644 (file)
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
                                /* We appear to be out of the grace period */
                                wake_up_all(&host->h_gracewait);
                        }
-                       dprintk("lockd: server returns status %d\n", resp->status);
+                       dprintk("lockd: server returns status %d\n",
+                               ntohl(resp->status));
                        return 0;       /* Okay, call complete */
                }
 
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
                goto out;
 
        if (resp->status != nlm_lck_denied_nolocks)
-               printk("lockd: unexpected unlock status: %d\n", resp->status);
+               printk("lockd: unexpected unlock status: %d\n",
+                       ntohl(resp->status));
        /* What to do now? I'm out of my depth... */
        status = -ENOLCK;
 out:
@@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
                return -ENOLCK;
 #endif
        }
-       printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
+       printk(KERN_NOTICE "lockd: unexpected server status %d\n",
+                ntohl(status));
        return -ENOLCK;
 }
index 8151554..2cde5d9 100644 (file)
@@ -77,6 +77,7 @@ config NFS_V4
 config NFS_V4_1
        bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
        depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+       select SUNRPC_BACKCHANNEL
        select PNFS_FILE_LAYOUT
        help
          This option enables support for minor version 1 of the NFSv4 protocol
index d4d1954..74780f9 100644 (file)
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
 static u32 initiate_file_draining(struct nfs_client *clp,
                                  struct cb_layoutrecallargs *args)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        struct inode *ino;
        bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
        LIST_HEAD(free_me_list);
 
        spin_lock(&clp->cl_lock);
-       list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
-               if (nfs_compare_fh(&args->cbl_fh,
-                                  &NFS_I(lo->plh_inode)->fh))
-                       continue;
-               ino = igrab(lo->plh_inode);
-               if (!ino)
-                       continue;
-               found = true;
-               /* Without this, layout can be freed as soon
-                * as we release cl_lock.
-                */
-               get_layout_hdr(lo);
-               break;
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               list_for_each_entry(lo, &server->layouts, plh_layouts) {
+                       if (nfs_compare_fh(&args->cbl_fh,
+                                          &NFS_I(lo->plh_inode)->fh))
+                               continue;
+                       ino = igrab(lo->plh_inode);
+                       if (!ino)
+                               continue;
+                       found = true;
+                       /* Without this, layout can be freed as soon
+                        * as we release cl_lock.
+                        */
+                       get_layout_hdr(lo);
+                       break;
+               }
+               if (found)
+                       break;
        }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
+
        if (!found)
                return NFS4ERR_NOMATCHING_LAYOUT;
 
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 static u32 initiate_bulk_draining(struct nfs_client *clp,
                                  struct cb_layoutrecallargs *args)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        struct inode *ino;
        u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
        };
 
        spin_lock(&clp->cl_lock);
-       list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                if ((args->cbl_recall_type == RETURN_FSID) &&
-                   memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
-                          &args->cbl_fsid, sizeof(struct nfs_fsid)))
-                       continue;
-               if (!igrab(lo->plh_inode))
+                   memcmp(&server->fsid, &args->cbl_fsid,
+                          sizeof(struct nfs_fsid)))
                        continue;
-               get_layout_hdr(lo);
-               BUG_ON(!list_empty(&lo->plh_bulk_recall));
-               list_add(&lo->plh_bulk_recall, &recall_list);
+
+               list_for_each_entry(lo, &server->layouts, plh_layouts) {
+                       if (!igrab(lo->plh_inode))
+                               continue;
+                       get_layout_hdr(lo);
+                       BUG_ON(!list_empty(&lo->plh_bulk_recall));
+                       list_add(&lo->plh_bulk_recall, &recall_list);
+               }
        }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
+
        list_for_each_entry_safe(lo, tmp,
                                 &recall_list, plh_bulk_recall) {
                ino = lo->plh_inode;
index b3dc2b8..19ea7d9 100644 (file)
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
        cred = rpc_lookup_machine_cred();
        if (!IS_ERR(cred))
                clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
-       INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
        nfs_fscache_get_client_cookie(clp);
 
        return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
        nfs4_deviceid_purge_client(clp);
 
        kfree(clp->cl_hostname);
+       kfree(clp->server_scope);
        kfree(clp);
 
        dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
        INIT_LIST_HEAD(&server->client_link);
        INIT_LIST_HEAD(&server->master_link);
        INIT_LIST_HEAD(&server->delegations);
+       INIT_LIST_HEAD(&server->layouts);
 
        atomic_set(&server->active, 0);
 
@@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
        dprintk("<-- %s %p\n", __func__, clp);
        return clp;
 }
-EXPORT_SYMBOL(nfs4_set_ds_client);
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
 
 /*
  * Session has been established, and the client marked ready.
index dd25c2a..321a66b 100644 (file)
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
        return err;
 }
 
-static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_server *server,
+               struct nfs_delegation *delegation)
 {
-       struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
-
        set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
-       set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+       set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
 }
 
 /**
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
                if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
                        continue;
                if (delegation->type & flags)
-                       nfs_mark_return_delegation(delegation);
+                       nfs_mark_return_delegation(server, delegation);
        }
 }
 
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
        list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
                if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
                        continue;
-               nfs_mark_return_delegation(delegation);
+               nfs_mark_return_delegation(server, delegation);
        }
 }
 
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
 int nfs_async_inode_return_delegation(struct inode *inode,
                                      const nfs4_stateid *stateid)
 {
-       struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct nfs_client *clp = server->nfs_client;
        struct nfs_delegation *delegation;
 
        rcu_read_lock();
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
                rcu_read_unlock();
                return -ENOENT;
        }
-       nfs_mark_return_delegation(delegation);
+       nfs_mark_return_delegation(server, delegation);
        rcu_read_unlock();
 
        nfs_delegation_run_state_manager(clp);
index 2a55347..ab12913 100644 (file)
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
 extern char *nfs_path(char **p, struct dentry *dentry,
                      char *buffer, ssize_t buflen);
 extern struct vfsmount *nfs_d_automount(struct path *path);
+#ifdef CONFIG_NFS_V4
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+#endif
 
 /* getroot.c */
 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
+struct nfs_pageio_descriptor;
 /* read.c */
 extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
                             const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+               struct list_head *head);
+
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+               struct list_head *head);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_write_data *p);
 extern int nfs_initiate_write(struct nfs_write_data *data,
                              struct rpc_clnt *clnt,
index 1f063ba..8102391 100644 (file)
@@ -119,7 +119,7 @@ Elong:
 }
 
 #ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
 {
        struct gss_api_mech *mech;
        struct xdr_netobj oid;
index b788f2e..1909ee8 100644 (file)
@@ -48,6 +48,7 @@ enum nfs4_client_state {
        NFS4CLNT_SESSION_RESET,
        NFS4CLNT_RECALL_SLOT,
        NFS4CLNT_LEASE_CONFIRM,
+       NFS4CLNT_SERVER_SCOPE_MISMATCH,
 };
 
 enum nfs4_session_state {
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
                        int cache_reply);
        int     (*validate_stateid)(struct nfs_delegation *,
                        const nfs4_stateid *);
+       int     (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
+                       struct nfs_fsinfo *);
        const struct nfs4_state_recovery_ops *reboot_recovery_ops;
        const struct nfs4_state_recovery_ops *nograce_recovery_ops;
        const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+                                     struct server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
index f9d03ab..be93a62 100644 (file)
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
                __func__, data->inode->i_ino,
                data->args.pgbase, (size_t)data->args.count, offset);
 
+       if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+               return PNFS_NOT_ATTEMPTED;
+
        /* Retrieve the correct rpc_client for the byte range */
        j = nfs4_fl_calc_j_index(lseg, offset);
        idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
                set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                return PNFS_NOT_ATTEMPTED;
        }
-       dprintk("%s USE DS:ip %x %hu\n", __func__,
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+       dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
 
        /* No multipath support. Use first DS */
        data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
        struct nfs_fh *fh;
        int status;
 
+       if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+               return PNFS_NOT_ATTEMPTED;
+
        /* Retrieve the correct rpc_client for the byte range */
        j = nfs4_fl_calc_j_index(lseg, offset);
        idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
                set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                return PNFS_NOT_ATTEMPTED;
        }
-       dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+       dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
                data->inode->i_ino, sync, (size_t) data->args.count, offset,
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+               ds->ds_remotestr);
 
        data->write_done_cb = filelayout_write_done_cb;
        data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 
        dprintk("--> %s\n", __func__);
 
+       /* FIXME: remove this check when layout segment support is added */
+       if (lgr->range.offset != 0 ||
+           lgr->range.length != NFS4_MAX_UINT64) {
+               dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
+                       __func__);
+               goto out;
+       }
+
        if (fl->pattern_offset > lgr->range.offset) {
                dprintk("%s pattern_offset %lld too large\n",
                                __func__, fl->pattern_offset);
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
                        goto out;
        } else
                dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+       /* Found deviceid is being reaped */
+       if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+                       goto out_put;
+
        fl->dsaddr = dsaddr;
 
        if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
  * return true  : coalesce page
  * return false : don't coalesce page
  */
-bool
+static bool
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
                   struct nfs_page *req)
 {
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
            !nfs_generic_pg_test(pgio, prev, req))
                return false;
 
-       if (!pgio->pg_lseg)
-               return 1;
        p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
        r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
        stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
        return (p_stripe == r_stripe);
 }
 
+void
+filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+                       struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          0,
+                                          NFS4_MAX_UINT64,
+                                          IOMODE_READ,
+                                          GFP_KERNEL);
+       /* If no lseg, fall back to read through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_read_mds(pgio);
+}
+
+void
+filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+                        struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          0,
+                                          NFS4_MAX_UINT64,
+                                          IOMODE_RW,
+                                          GFP_NOFS);
+       /* If no lseg, fall back to write through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_write_mds(pgio);
+}
+
+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+       .pg_init = filelayout_pg_init_read,
+       .pg_test = filelayout_pg_test,
+       .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+       .pg_init = filelayout_pg_init_write,
+       .pg_test = filelayout_pg_test,
+       .pg_doio = pnfs_generic_pg_writepages,
+};
+
 static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
 {
        return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
        .owner                  = THIS_MODULE,
        .alloc_lseg             = filelayout_alloc_lseg,
        .free_lseg              = filelayout_free_lseg,
-       .pg_test                = filelayout_pg_test,
+       .pg_read_ops            = &filelayout_pg_read_ops,
+       .pg_write_ops           = &filelayout_pg_write_ops,
        .mark_pnfs_commit       = filelayout_mark_pnfs_commit,
        .choose_commit_list     = filelayout_choose_commit_list,
        .commit_pagelist        = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
        pnfs_unregister_layoutdriver(&filelayout_type);
 }
 
+MODULE_ALIAS("nfs-layouttype4-1");
+
 module_init(nfs4filelayout_init);
 module_exit(nfs4filelayout_exit);
index cebe01e..2e42284 100644 (file)
@@ -47,10 +47,17 @@ enum stripetype4 {
 };
 
 /* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+       struct sockaddr_storage da_addr;
+       size_t                  da_addrlen;
+       struct list_head        da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
+       char                    *da_remotestr;  /* human readable addr+port */
+};
+
 struct nfs4_pnfs_ds {
        struct list_head        ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
-       u32                     ds_ip_addr;
-       u32                     ds_port;
+       char                    *ds_remotestr;  /* comma sep list of addrs */
+       struct list_head        ds_addrs;
        struct nfs_client       *ds_clp;
        atomic_t                ds_count;
 };
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
                            generic_hdr);
 }
 
+static inline struct nfs4_deviceid_node *
+FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
+{
+       return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
+}
+
 extern struct nfs_fh *
 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
 
index 3b7bf13..ed388aa 100644 (file)
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
                printk("%s NULL device\n", __func__);
                return;
        }
-       printk("        ip_addr %x port %hu\n"
+       printk("        ds %s\n"
                "        ref count %d\n"
                "        client %p\n"
                "        cl_exchange_flags %x\n",
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+               ds->ds_remotestr,
                atomic_read(&ds->ds_count), ds->ds_clp,
                ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
 }
 
-/* nfs4_ds_cache_lock is held */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
 {
-       struct nfs4_pnfs_ds *ds;
+       struct sockaddr_in *a, *b;
+       struct sockaddr_in6 *a6, *b6;
+
+       if (addr1->sa_family != addr2->sa_family)
+               return false;
+
+       switch (addr1->sa_family) {
+       case AF_INET:
+               a = (struct sockaddr_in *)addr1;
+               b = (struct sockaddr_in *)addr2;
+
+               if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+                   a->sin_port == b->sin_port)
+                       return true;
+               break;
+
+       case AF_INET6:
+               a6 = (struct sockaddr_in6 *)addr1;
+               b6 = (struct sockaddr_in6 *)addr2;
+
+               /* LINKLOCAL addresses must have matching scope_id */
+               if (ipv6_addr_scope(&a6->sin6_addr) ==
+                   IPV6_ADDR_SCOPE_LINKLOCAL &&
+                   a6->sin6_scope_id != b6->sin6_scope_id)
+                       return false;
+
+               if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+                   a6->sin6_port == b6->sin6_port)
+                       return true;
+               break;
+
+       default:
+               dprintk("%s: unhandled address family: %u\n",
+                       __func__, addr1->sa_family);
+               return false;
+       }
 
-       dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
-                       ntohl(ip_addr), ntohs(port));
+       return false;
+}
 
-       list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
-               if (ds->ds_ip_addr == ip_addr &&
-                   ds->ds_port == port) {
-                       return ds;
+/*
+ * Lookup DS by addresses.  The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(struct list_head *dsaddrs)
+{
+       struct nfs4_pnfs_ds *ds;
+       struct nfs4_pnfs_ds_addr *da1, *da2;
+
+       list_for_each_entry(da1, dsaddrs, da_node) {
+               list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+                       list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+                               if (same_sockaddr(
+                                       (struct sockaddr *)&da1->da_addr,
+                                       (struct sockaddr *)&da2->da_addr))
+                                       return ds;
+                       }
                }
        }
        return NULL;
 }
 
 /*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+                                   struct list_head *dsaddrs2)
+{
+       struct nfs4_pnfs_ds_addr *da1, *da2;
+       size_t count1 = 0,
+              count2 = 0;
+
+       list_for_each_entry(da1, dsaddrs1, da_node)
+               count1++;
+
+       list_for_each_entry(da2, dsaddrs2, da_node) {
+               bool found = false;
+               count2++;
+               list_for_each_entry(da1, dsaddrs1, da_node) {
+                       if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+                               (struct sockaddr *)&da2->da_addr)) {
+                               found = true;
+                               break;
+                       }
+               }
+               if (!found)
+                       return false;
+       }
+
+       return (count1 == count2);
+}
+
+/*
  * Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
  */
 static int
 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
 {
-       struct nfs_client *clp;
-       struct sockaddr_in sin;
+       struct nfs_client *clp = ERR_PTR(-EIO);
+       struct nfs4_pnfs_ds_addr *da;
        int status = 0;
 
-       dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+       dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
                mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 
-       sin.sin_family = AF_INET;
-       sin.sin_addr.s_addr = ds->ds_ip_addr;
-       sin.sin_port = ds->ds_port;
+       BUG_ON(list_empty(&ds->ds_addrs));
+
+       list_for_each_entry(da, &ds->ds_addrs, da_node) {
+               dprintk("%s: DS %s: trying address %s\n",
+                       __func__, ds->ds_remotestr, da->da_remotestr);
+
+               clp = nfs4_set_ds_client(mds_srv->nfs_client,
+                                (struct sockaddr *)&da->da_addr,
+                                da->da_addrlen, IPPROTO_TCP);
+               if (!IS_ERR(clp))
+                       break;
+       }
 
-       clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
-                                sizeof(sin), IPPROTO_TCP);
        if (IS_ERR(clp)) {
                status = PTR_ERR(clp);
                goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
                        goto out_put;
                }
                ds->ds_clp = clp;
-               dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
-                       ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+               dprintk("%s [existing] server=%s\n", __func__,
+                       ds->ds_remotestr);
                goto out;
        }
 
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
                goto out_put;
 
        ds->ds_clp = clp;
-       dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
-               ntohs(ds->ds_port));
+       dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 out:
        return status;
 out_put:
@@ -147,12 +231,25 @@ out_put:
 static void
 destroy_ds(struct nfs4_pnfs_ds *ds)
 {
+       struct nfs4_pnfs_ds_addr *da;
+
        dprintk("--> %s\n", __func__);
        ifdebug(FACILITY)
                print_ds(ds);
 
        if (ds->ds_clp)
                nfs_put_client(ds->ds_clp);
+
+       while (!list_empty(&ds->ds_addrs)) {
+               da = list_first_entry(&ds->ds_addrs,
+                                     struct nfs4_pnfs_ds_addr,
+                                     da_node);
+               list_del_init(&da->da_node);
+               kfree(da->da_remotestr);
+               kfree(da);
+       }
+
+       kfree(ds->ds_remotestr);
        kfree(ds);
 }
 
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
        kfree(dsaddr);
 }
 
+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
+{
+       struct nfs4_pnfs_ds_addr *da;
+       char *remotestr;
+       size_t len;
+       char *p;
+
+       len = 3;        /* '{', '}' and eol */
+       list_for_each_entry(da, dsaddrs, da_node) {
+               len += strlen(da->da_remotestr) + 1;    /* string plus comma */
+       }
+
+       remotestr = kzalloc(len, gfp_flags);
+       if (!remotestr)
+               return NULL;
+
+       p = remotestr;
+       *(p++) = '{';
+       len--;
+       list_for_each_entry(da, dsaddrs, da_node) {
+               size_t ll = strlen(da->da_remotestr);
+
+               if (ll > len)
+                       goto out_err;
+
+               memcpy(p, da->da_remotestr, ll);
+               p += ll;
+               len -= ll;
+
+               if (len < 1)
+                       goto out_err;
+               (*p++) = ',';
+               len--;
+       }
+       if (len < 2)
+               goto out_err;
+       *(p++) = '}';
+       *p = '\0';
+       return remotestr;
+out_err:
+       kfree(remotestr);
+       return NULL;
+}
+
 static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 {
-       struct nfs4_pnfs_ds *tmp_ds, *ds;
+       struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+       char *remotestr;
 
-       ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+       if (list_empty(dsaddrs)) {
+               dprintk("%s: no addresses defined\n", __func__);
+               goto out;
+       }
+
+       ds = kzalloc(sizeof(*ds), gfp_flags);
        if (!ds)
                goto out;
 
+       /* this is only used for debugging, so it's ok if its NULL */
+       remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
+
        spin_lock(&nfs4_ds_cache_lock);
-       tmp_ds = _data_server_lookup_locked(ip_addr, port);
+       tmp_ds = _data_server_lookup_locked(dsaddrs);
        if (tmp_ds == NULL) {
-               ds->ds_ip_addr = ip_addr;
-               ds->ds_port = port;
+               INIT_LIST_HEAD(&ds->ds_addrs);
+               list_splice_init(dsaddrs, &ds->ds_addrs);
+               ds->ds_remotestr = remotestr;
                atomic_set(&ds->ds_count, 1);
                INIT_LIST_HEAD(&ds->ds_node);
                ds->ds_clp = NULL;
                list_add(&ds->ds_node, &nfs4_data_server_cache);
-               dprintk("%s add new data server ip 0x%x\n", __func__,
-                       ds->ds_ip_addr);
+               dprintk("%s add new data server %s\n", __func__,
+                       ds->ds_remotestr);
        } else {
+               if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+                                                        dsaddrs)) {
+                       dprintk("%s:  multipath address mismatch: %s != %s",
+                               __func__, tmp_ds->ds_remotestr, remotestr);
+               }
+               kfree(remotestr);
                kfree(ds);
                atomic_inc(&tmp_ds->ds_count);
-               dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
-                       __func__, tmp_ds->ds_ip_addr,
+               dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+                       __func__, tmp_ds->ds_remotestr,
                        atomic_read(&tmp_ds->ds_count));
                ds = tmp_ds;
        }
@@ -213,18 +375,22 @@ out:
 }
 
 /*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
  */
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
 {
-       struct nfs4_pnfs_ds *ds = NULL;
-       char *buf;
-       const char *ipend, *pstr;
-       u32 ip_addr, port;
-       int nlen, rlen, i;
+       struct nfs4_pnfs_ds_addr *da = NULL;
+       char *buf, *portstr;
+       u32 port;
+       int nlen, rlen;
        int tmp[2];
        __be32 *p;
+       char *netid, *match_netid;
+       size_t len, match_netid_len;
+       char *startsep = "";
+       char *endsep = "";
+
 
        /* r_netid */
        p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
        if (unlikely(!p))
                goto out_err;
 
-       /* Check that netid is "tcp" */
-       if (nlen != 3 ||  memcmp((char *)p, "tcp", 3)) {
-               dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+       netid = kmalloc(nlen+1, gfp_flags);
+       if (unlikely(!netid))
                goto out_err;
-       }
 
-       /* r_addr */
+       netid[nlen] = '\0';
+       memcpy(netid, p, nlen);
+
+       /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
        p = xdr_inline_decode(streamp, 4);
        if (unlikely(!p))
-               goto out_err;
+               goto out_free_netid;
        rlen = be32_to_cpup(p);
 
        p = xdr_inline_decode(streamp, rlen);
        if (unlikely(!p))
-               goto out_err;
+               goto out_free_netid;
 
-       /* ipv6 length plus port is legal */
-       if (rlen > INET6_ADDRSTRLEN + 8) {
+       /* port is ".ABC.DEF", 8 chars max */
+       if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
                dprintk("%s: Invalid address, length %d\n", __func__,
                        rlen);
-               goto out_err;
+               goto out_free_netid;
        }
        buf = kmalloc(rlen + 1, gfp_flags);
        if (!buf) {
                dprintk("%s: Not enough memory\n", __func__);
-               goto out_err;
+               goto out_free_netid;
        }
        buf[rlen] = '\0';
        memcpy(buf, p, rlen);
 
-       /* replace the port dots with dashes for the in4_pton() delimiter*/
-       for (i = 0; i < 2; i++) {
-               char *res = strrchr(buf, '.');
-               if (!res) {
-                       dprintk("%s: Failed finding expected dots in port\n",
-                               __func__);
-                       goto out_free;
-               }
-               *res = '-';
+       /* replace port '.' with '-' */
+       portstr = strrchr(buf, '.');
+       if (!portstr) {
+               dprintk("%s: Failed finding expected dot in port\n",
+                       __func__);
+               goto out_free_buf;
+       }
+       *portstr = '-';
+
+       /* find '.' between address and port */
+       portstr = strrchr(buf, '.');
+       if (!portstr) {
+               dprintk("%s: Failed finding expected dot between address and "
+                       "port\n", __func__);
+               goto out_free_buf;
        }
+       *portstr = '\0';
 
-       /* Currently only support ipv4 address */
-       if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
-               dprintk("%s: Only ipv4 addresses supported\n", __func__);
-               goto out_free;
+       da = kzalloc(sizeof(*da), gfp_flags);
+       if (unlikely(!da))
+               goto out_free_buf;
+
+       INIT_LIST_HEAD(&da->da_node);
+
+       if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+                     sizeof(da->da_addr))) {
+               dprintk("%s: error parsing address %s\n", __func__, buf);
+               goto out_free_da;
        }
 
-       /* port */
-       pstr = ipend;
-       sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+       portstr++;
+       sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
        port = htons((tmp[0] << 8) | (tmp[1]));
 
-       ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
-       dprintk("%s: Decoded address and port %s\n", __func__, buf);
-out_free:
+       switch (da->da_addr.ss_family) {
+       case AF_INET:
+               ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
+               da->da_addrlen = sizeof(struct sockaddr_in);
+               match_netid = "tcp";
+               match_netid_len = 3;
+               break;
+
+       case AF_INET6:
+               ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
+               da->da_addrlen = sizeof(struct sockaddr_in6);
+               match_netid = "tcp6";
+               match_netid_len = 4;
+               startsep = "[";
+               endsep = "]";
+               break;
+
+       default:
+               dprintk("%s: unsupported address family: %u\n",
+                       __func__, da->da_addr.ss_family);
+               goto out_free_da;
+       }
+
+       if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
+               dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
+                       __func__, netid, match_netid);
+               goto out_free_da;
+       }
+
+       /* save human readable address */
+       len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
+       da->da_remotestr = kzalloc(len, gfp_flags);
+
+       /* NULL is ok, only used for dprintk */
+       if (da->da_remotestr)
+               snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
+                        buf, endsep, ntohs(port));
+
+       dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
        kfree(buf);
+       kfree(netid);
+       return da;
+
+out_free_da:
+       kfree(da);
+out_free_buf:
+       dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
+       kfree(buf);
+out_free_netid:
+       kfree(netid);
 out_err:
-       return ds;
+       return NULL;
 }
 
 /* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
        struct xdr_stream stream;
        struct xdr_buf buf;
        struct page *scratch;
+       struct list_head dsaddrs;
+       struct nfs4_pnfs_ds_addr *da;
 
        /* set up xdr stream */
        scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
                                NFS_SERVER(ino)->nfs_client,
                                &pdev->dev_id);
 
+       INIT_LIST_HEAD(&dsaddrs);
+
        for (i = 0; i < dsaddr->ds_num; i++) {
                int j;
                u32 mp_count;
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
                        goto out_err_free_deviceid;
 
                mp_count = be32_to_cpup(p); /* multipath count */
-               if (mp_count > 1) {
-                       printk(KERN_WARNING
-                              "%s: Multipath count %d not supported, "
-                              "skipping all greater than 1\n", __func__,
-                               mp_count);
-               }
                for (j = 0; j < mp_count; j++) {
-                       if (j == 0) {
-                               dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-                                       ino, gfp_flags);
-                               if (dsaddr->ds_list[i] == NULL)
-                                       goto out_err_free_deviceid;
-                       } else {
-                               u32 len;
-                               /* skip extra multipath */
-
-                               /* read len, skip */
-                               p = xdr_inline_decode(&stream, 4);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-                               len = be32_to_cpup(p);
-
-                               p = xdr_inline_decode(&stream, len);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-
-                               /* read len, skip */
-                               p = xdr_inline_decode(&stream, 4);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-                               len = be32_to_cpup(p);
-
-                               p = xdr_inline_decode(&stream, len);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-                       }
+                       da = decode_ds_addr(&stream, gfp_flags);
+                       if (da)
+                               list_add_tail(&da->da_node, &dsaddrs);
+               }
+               if (list_empty(&dsaddrs)) {
+                       dprintk("%s: no suitable DS addresses found\n",
+                               __func__);
+                       goto out_err_free_deviceid;
+               }
+
+               dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+               if (!dsaddr->ds_list[i])
+                       goto out_err_drain_dsaddrs;
+
+               /* If DS was already in cache, free ds addrs */
+               while (!list_empty(&dsaddrs)) {
+                       da = list_first_entry(&dsaddrs,
+                                             struct nfs4_pnfs_ds_addr,
+                                             da_node);
+                       list_del_init(&da->da_node);
+                       kfree(da->da_remotestr);
+                       kfree(da);
                }
        }
 
        __free_page(scratch);
        return dsaddr;
 
+out_err_drain_dsaddrs:
+       while (!list_empty(&dsaddrs)) {
+               da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
+                                     da_node);
+               list_del_init(&da->da_node);
+               kfree(da->da_remotestr);
+               kfree(da);
+       }
 out_err_free_deviceid:
        nfs4_fl_free_deviceid(dsaddr);
        /* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 
 static void
 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
-                              int err, u32 ds_addr)
+                              int err, const char *ds_remotestr)
 {
        u32 *p = (u32 *)&dsaddr->id_node.deviceid;
 
-       printk(KERN_ERR "NFS: data server %x connection error %d."
+       printk(KERN_ERR "NFS: data server %s connection error %d."
                " Deviceid [%x%x%x%x] marked out of use.\n",
-               ds_addr, err, p[0], p[1], p[2], p[3]);
+               ds_remotestr, err, p[0], p[1], p[2], p[3]);
 
        spin_lock(&nfs4_ds_cache_lock);
        dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
                err = nfs4_ds_connect(s, ds);
                if (err) {
                        filelayout_mark_devid_negative(dsaddr, err,
-                                                      ntohl(ds->ds_ip_addr));
+                                                      ds->ds_remotestr);
                        return NULL;
                }
        }
index 26bece8..079614d 100644 (file)
@@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                            struct nfs_fattr *fattr, struct iattr *sattr,
                            struct nfs4_state *state);
-
+#ifdef CONFIG_NFS_V4_1
+static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
+static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
+#endif
 /* Prevent leaks of NFSv4 errors into userland */
 static int nfs4_map_errors(int err)
 {
@@ -1689,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
        return ret;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+       int status;
+       struct nfs_server *server = NFS_SERVER(state->inode);
+
+       status = nfs41_test_stateid(server, state);
+       if (status == NFS_OK)
+               return 0;
+       nfs41_free_stateid(server, state);
+       return nfs4_open_expired(sp, state);
+}
+#endif
+
 /*
  * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
  * fields corresponding to attributes that were used to store the verifier.
@@ -2252,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
                              struct nfs_fsinfo *info)
 {
+       int minor_version = server->nfs_client->cl_minorversion;
        int status = nfs4_lookup_root(server, fhandle, info);
        if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
                /*
                 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
                 * by nfs4_map_errors() as this function exits.
                 */
-               status = nfs4_find_root_sec(server, fhandle, info);
+               status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
        if (status == 0)
                status = nfs4_server_capabilities(server, fhandle);
        if (status == 0)
@@ -4441,6 +4459,20 @@ out:
        return err;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+       int status;
+       struct nfs_server *server = NFS_SERVER(state->inode);
+
+       status = nfs41_test_stateid(server, state);
+       if (status == NFS_OK)
+               return 0;
+       nfs41_free_stateid(server, state);
+       return nfs4_lock_expired(state, request);
+}
+#endif
+
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
        struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -4779,6 +4811,16 @@ out_inval:
        return -NFS4ERR_INVAL;
 }
 
+static bool
+nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+{
+       if (a->server_scope_sz == b->server_scope_sz &&
+           memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
+               return true;
+
+       return false;
+}
+
 /*
  * nfs4_proc_exchange_id()
  *
@@ -4821,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                                init_utsname()->domainname,
                                clp->cl_rpcclient->cl_auth->au_flavor);
 
+       res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
+       if (unlikely(!res.server_scope))
+               return -ENOMEM;
+
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        if (!status)
                status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+
+       if (!status) {
+               if (clp->server_scope &&
+                   !nfs41_same_server_scope(clp->server_scope,
+                                            res.server_scope)) {
+                       dprintk("%s: server_scope mismatch detected\n",
+                               __func__);
+                       set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
+                       kfree(clp->server_scope);
+                       clp->server_scope = NULL;
+               }
+
+               if (!clp->server_scope)
+                       clp->server_scope = res.server_scope;
+               else
+                       kfree(res.server_scope);
+       }
+
        dprintk("<-- %s status= %d\n", __func__, status);
        return status;
 }
@@ -5704,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 {
        struct nfs4_layoutreturn *lrp = calldata;
        struct nfs_server *server;
-       struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+       struct pnfs_layout_hdr *lo = lrp->args.layout;
 
        dprintk("--> %s\n", __func__);
 
@@ -5733,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata)
        struct nfs4_layoutreturn *lrp = calldata;
 
        dprintk("--> %s\n", __func__);
-       put_layout_hdr(NFS_I(lrp->args.inode)->layout);
+       put_layout_hdr(lrp->args.layout);
        kfree(calldata);
        dprintk("<-- %s\n", __func__);
 }
@@ -5901,6 +5965,143 @@ out:
        rpc_put_task(task);
        return status;
 }
+
+static int
+_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+                   struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+       struct nfs41_secinfo_no_name_args args = {
+               .style = SECINFO_STYLE_CURRENT_FH,
+       };
+       struct nfs4_secinfo_res res = {
+               .flavors = flavors,
+       };
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+       return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+}
+
+static int
+nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+                          struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+       struct nfs4_exception exception = { };
+       int err;
+       do {
+               err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+               switch (err) {
+               case 0:
+               case -NFS4ERR_WRONGSEC:
+               case -NFS4ERR_NOTSUPP:
+                       break;
+               default:
+                       err = nfs4_handle_exception(server, err, &exception);
+               }
+       } while (exception.retry);
+       return err;
+}
+
+static int
+nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
+                   struct nfs_fsinfo *info)
+{
+       int err;
+       struct page *page;
+       rpc_authflavor_t flavor;
+       struct nfs4_secinfo_flavors *flavors;
+
+       page = alloc_page(GFP_KERNEL);
+       if (!page) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       flavors = page_address(page);
+       err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+
+       /*
+        * Fall back on "guess and check" method if
+        * the server doesn't support SECINFO_NO_NAME
+        */
+       if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+               err = nfs4_find_root_sec(server, fhandle, info);
+               goto out_freepage;
+       }
+       if (err)
+               goto out_freepage;
+
+       flavor = nfs_find_best_sec(flavors);
+       if (err == 0)
+               err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+
+out_freepage:
+       put_page(page);
+       if (err == -EACCES)
+               return -EPERM;
+out:
+       return err;
+}
+static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       int status;
+       struct nfs41_test_stateid_args args = {
+               .stateid = &state->stateid,
+       };
+       struct nfs41_test_stateid_res res;
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+       args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+       return status;
+}
+
+static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       struct nfs4_exception exception = { };
+       int err;
+       do {
+               err = nfs4_handle_exception(server,
+                               _nfs41_test_stateid(server, state),
+                               &exception);
+       } while (exception.retry);
+       return err;
+}
+
+static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       int status;
+       struct nfs41_free_stateid_args args = {
+               .stateid = &state->stateid,
+       };
+       struct nfs41_free_stateid_res res;
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+
+       args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+       return status;
+}
+
+static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       struct nfs4_exception exception = { };
+       int err;
+       do {
+               err = nfs4_handle_exception(server,
+                               _nfs4_free_stateid(server, state),
+                               &exception);
+       } while (exception.retry);
+       return err;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5937,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
 struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
        .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
        .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
-       .recover_open   = nfs4_open_expired,
-       .recover_lock   = nfs4_lock_expired,
+       .recover_open   = nfs41_open_expired,
+       .recover_lock   = nfs41_lock_expired,
        .establish_clid = nfs41_init_clientid,
        .get_clid_cred  = nfs4_get_exchange_id_cred,
 };
@@ -5962,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
        .minor_version = 0,
        .call_sync = _nfs4_call_sync,
        .validate_stateid = nfs4_validate_delegation_stateid,
+       .find_root_sec = nfs4_find_root_sec,
        .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
        .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
        .state_renewal_ops = &nfs40_state_renewal_ops,
@@ -5972,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
        .minor_version = 1,
        .call_sync = _nfs4_call_sync_session,
        .validate_stateid = nfs41_validate_delegation_stateid,
+       .find_root_sec = nfs41_find_root_sec,
        .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
        .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
        .state_renewal_ops = &nfs41_state_renewal_ops,
index 7acfe88..72ab97e 100644 (file)
@@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp)
                                goto out_error;
                        }
                        clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
-                       set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+
+                       if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
+                                              &clp->cl_state))
+                               nfs4_state_start_reclaim_nograce(clp);
+                       else
+                               set_bit(NFS4CLNT_RECLAIM_REBOOT,
+                                       &clp->cl_state);
+
                        pnfs_destroy_all_layouts(clp);
                }
 
index e6e8f3b..c191a9b 100644 (file)
@@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int);
                                1 /* FIXME: opaque lrf_body always empty at the moment */)
 #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
                                1 + decode_stateid_maxsz)
+#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
+#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
+#define encode_test_stateid_maxsz      (op_encode_hdr_maxsz + 2 + \
+                                        XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_test_stateid_maxsz      (op_decode_hdr_maxsz + 2 + 1)
+#define encode_free_stateid_maxsz      (op_encode_hdr_maxsz + 1 + \
+                                        XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_free_stateid_maxsz      (op_decode_hdr_maxsz + 1)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz  0
 #define decode_sequence_maxsz  0
@@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int);
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
                                decode_layoutreturn_maxsz)
+#define NFS4_enc_secinfo_no_name_sz    (compound_encode_hdr_maxsz + \
+                                       encode_sequence_maxsz + \
+                                       encode_putrootfh_maxsz +\
+                                       encode_secinfo_no_name_maxsz)
+#define NFS4_dec_secinfo_no_name_sz    (compound_decode_hdr_maxsz + \
+                                       decode_sequence_maxsz + \
+                                       decode_putrootfh_maxsz + \
+                                       decode_secinfo_no_name_maxsz)
+#define NFS4_enc_test_stateid_sz       (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
+                                        encode_test_stateid_maxsz)
+#define NFS4_dec_test_stateid_sz       (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
+                                        decode_test_stateid_maxsz)
+#define NFS4_enc_free_stateid_sz       (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
+                                        encode_free_stateid_maxsz)
+#define NFS4_dec_free_stateid_sz       (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
+                                        decode_free_stateid_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
                                      compound_encode_hdr_maxsz +
@@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr,
        hdr->nops++;
        hdr->replen += decode_layoutreturn_maxsz;
 }
+
+static int
+encode_secinfo_no_name(struct xdr_stream *xdr,
+                      const struct nfs41_secinfo_no_name_args *args,
+                      struct compound_hdr *hdr)
+{
+       __be32 *p;
+       p = reserve_space(xdr, 8);
+       *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
+       *p++ = cpu_to_be32(args->style);
+       hdr->nops++;
+       hdr->replen += decode_secinfo_no_name_maxsz;
+       return 0;
+}
+
+static void encode_test_stateid(struct xdr_stream *xdr,
+                               struct nfs41_test_stateid_args *args,
+                               struct compound_hdr *hdr)
+{
+       __be32 *p;
+
+       p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
+       *p++ = cpu_to_be32(OP_TEST_STATEID);
+       *p++ = cpu_to_be32(1);
+       xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+       hdr->nops++;
+       hdr->replen += decode_test_stateid_maxsz;
+}
+
+static void encode_free_stateid(struct xdr_stream *xdr,
+                               struct nfs41_free_stateid_args *args,
+                               struct compound_hdr *hdr)
+{
+       __be32 *p;
+       p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
+       *p++ = cpu_to_be32(OP_FREE_STATEID);
+       xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+       hdr->nops++;
+       hdr->replen += decode_free_stateid_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
        encode_layoutreturn(xdr, args, &hdr);
        encode_nops(&hdr);
 }
+
+/*
+ * Encode SECINFO_NO_NAME request
+ */
+static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+                                       struct xdr_stream *xdr,
+                                       struct nfs41_secinfo_no_name_args *args)
+{
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_putrootfh(xdr, &hdr);
+       encode_secinfo_no_name(xdr, args, &hdr);
+       encode_nops(&hdr);
+       return 0;
+}
+
+/*
+ *  Encode TEST_STATEID request
+ */
+static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
+                                     struct xdr_stream *xdr,
+                                     struct nfs41_test_stateid_args *args)
+{
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_test_stateid(xdr, args, &hdr);
+       encode_nops(&hdr);
+}
+
+/*
+ *  Encode FREE_STATEID request
+ */
+static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
+                                    struct xdr_stream *xdr,
+                                    struct nfs41_free_stateid_args *args)
+{
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_free_stateid(xdr, args, &hdr);
+       encode_nops(&hdr);
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        if (unlikely(status))
                return status;
 
-       /* Throw away server_scope */
+       /* Save server_scope */
        status = decode_opaque_inline(xdr, &dummy, &dummy_str);
        if (unlikely(status))
                return status;
 
+       if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+               return -EIO;
+
+       memcpy(res->server_scope->server_scope, dummy_str, dummy);
+       res->server_scope->server_scope_sz = dummy;
+
        /* Throw away Implementation id array */
        status = decode_opaque_inline(xdr, &dummy, &dummy_str);
        if (unlikely(status))
@@ -5322,6 +5449,55 @@ out_overflow:
        print_overflow_msg(__func__, xdr);
        return -EIO;
 }
+
+static int decode_test_stateid(struct xdr_stream *xdr,
+                              struct nfs41_test_stateid_res *res)
+{
+       __be32 *p;
+       int status;
+       int num_res;
+
+       status = decode_op_hdr(xdr, OP_TEST_STATEID);
+       if (status)
+               return status;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       num_res = be32_to_cpup(p++);
+       if (num_res != 1)
+               goto out;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       res->status = be32_to_cpup(p++);
+       return res->status;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+out:
+       return -EIO;
+}
+
+static int decode_free_stateid(struct xdr_stream *xdr,
+                              struct nfs41_free_stateid_res *res)
+{
+       __be32 *p;
+       int status;
+
+       status = decode_op_hdr(xdr, OP_FREE_STATEID);
+       if (status)
+               return status;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       res->status = be32_to_cpup(p++);
+       return res->status;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 out:
        return status;
 }
+
+/*
+ * Decode SECINFO_NO_NAME response
+ */
+static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
+                                       struct xdr_stream *xdr,
+                                       struct nfs4_secinfo_res *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_putrootfh(xdr);
+       if (status)
+               goto out;
+       status = decode_secinfo(xdr, res);
+out:
+       return status;
+}
+
+/*
+ * Decode TEST_STATEID response
+ */
+static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
+                                    struct xdr_stream *xdr,
+                                    struct nfs41_test_stateid_res *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_test_stateid(xdr, res);
+out:
+       return status;
+}
+
+/*
+ * Decode FREE_STATEID response
+ */
+static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
+                                    struct xdr_stream *xdr,
+                                    struct nfs41_free_stateid_res *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_free_stateid(xdr, res);
+out:
+       return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /**
@@ -6663,6 +6905,9 @@ struct rpc_procinfo       nfs4_procedures[] = {
        PROC(LAYOUTGET,         enc_layoutget,          dec_layoutget),
        PROC(LAYOUTCOMMIT,      enc_layoutcommit,       dec_layoutcommit),
        PROC(LAYOUTRETURN,      enc_layoutreturn,       dec_layoutreturn),
+       PROC(SECINFO_NO_NAME,   enc_secinfo_no_name,    dec_secinfo_no_name),
+       PROC(TEST_STATEID,      enc_test_stateid,       dec_test_stateid),
+       PROC(FREE_STATEID,      enc_free_stateid,       dec_free_stateid),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
index 8ff2ea3..9383ca7 100644 (file)
@@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
        if (!pnfs_generic_pg_test(pgio, prev, req))
                return false;
 
-       if (pgio->pg_lseg == NULL)
-               return true;
-
        return pgio->pg_count + req->wb_bytes <=
                        OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
 }
 
+static const struct nfs_pageio_ops objio_pg_read_ops = {
+       .pg_init = pnfs_generic_pg_init_read,
+       .pg_test = objio_pg_test,
+       .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops objio_pg_write_ops = {
+       .pg_init = pnfs_generic_pg_init_write,
+       .pg_test = objio_pg_test,
+       .pg_doio = pnfs_generic_pg_writepages,
+};
+
 static struct pnfs_layoutdriver_type objlayout_type = {
        .id = LAYOUT_OSD2_OBJECTS,
        .name = "LAYOUT_OSD2_OBJECTS",
@@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
 
        .read_pagelist           = objlayout_read_pagelist,
        .write_pagelist          = objlayout_write_pagelist,
-       .pg_test                 = objio_pg_test,
+       .pg_read_ops             = &objio_pg_read_ops,
+       .pg_write_ops            = &objio_pg_write_ops,
 
        .free_deviceid_node      = objio_free_deviceid_node,
 
@@ -1055,5 +1065,7 @@ objlayout_exit(void)
               __func__);
 }
 
+MODULE_ALIAS("nfs-layouttype4-2");
+
 module_init(objlayout_init);
 module_exit(objlayout_exit);
index 18449f4..b60970c 100644 (file)
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
  */
 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                     struct inode *inode,
-                    int (*doio)(struct nfs_pageio_descriptor *),
+                    const struct nfs_pageio_ops *pg_ops,
                     size_t bsize,
                     int io_flags)
 {
@@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
        desc->pg_bsize = bsize;
        desc->pg_base = 0;
        desc->pg_moreio = 0;
+       desc->pg_recoalesce = 0;
        desc->pg_inode = inode;
-       desc->pg_doio = doio;
+       desc->pg_ops = pg_ops;
        desc->pg_ioflags = io_flags;
        desc->pg_error = 0;
        desc->pg_lseg = NULL;
-       desc->pg_test = nfs_generic_pg_test;
-       pnfs_pageio_init(desc, inode);
 }
 
 /**
@@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
                return false;
        if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
                return false;
-       return pgio->pg_test(pgio, prev, req);
+       return pgio->pg_ops->pg_test(pgio, prev, req);
 }
 
 /**
@@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
                if (!nfs_can_coalesce_requests(prev, req, desc))
                        return 0;
        } else {
+               if (desc->pg_ops->pg_init)
+                       desc->pg_ops->pg_init(desc, req);
                desc->pg_base = req->wb_pgbase;
        }
        nfs_list_remove_request(req);
@@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
 {
        if (!list_empty(&desc->pg_list)) {
-               int error = desc->pg_doio(desc);
+               int error = desc->pg_ops->pg_doio(desc);
                if (error < 0)
                        desc->pg_error = error;
                else
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
  * Returns true if the request 'req' was successfully coalesced into the
  * existing list of pages 'desc'.
  */
-int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                           struct nfs_page *req)
 {
        while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                if (desc->pg_error < 0)
                        return 0;
                desc->pg_moreio = 0;
+               if (desc->pg_recoalesce)
+                       return 0;
        }
        return 1;
 }
 
+static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+
+       do {
+               list_splice_init(&desc->pg_list, &head);
+               desc->pg_bytes_written -= desc->pg_count;
+               desc->pg_count = 0;
+               desc->pg_base = 0;
+               desc->pg_recoalesce = 0;
+
+               while (!list_empty(&head)) {
+                       struct nfs_page *req;
+
+                       req = list_first_entry(&head, struct nfs_page, wb_list);
+                       nfs_list_remove_request(req);
+                       if (__nfs_pageio_add_request(desc, req))
+                               continue;
+                       if (desc->pg_error < 0)
+                               return 0;
+                       break;
+               }
+       } while (desc->pg_recoalesce);
+       return 1;
+}
+
+int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+               struct nfs_page *req)
+{
+       int ret;
+
+       do {
+               ret = __nfs_pageio_add_request(desc, req);
+               if (ret)
+                       break;
+               if (desc->pg_error < 0)
+                       break;
+               ret = nfs_do_recoalesce(desc);
+       } while (ret);
+       return ret;
+}
+
 /**
  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
  * @desc: pointer to io descriptor
  */
 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
 {
-       nfs_pageio_doio(desc);
+       for (;;) {
+               nfs_pageio_doio(desc);
+               if (!desc->pg_recoalesce)
+                       break;
+               if (!nfs_do_recoalesce(desc))
+                       break;
+       }
 }
 
 /**
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
        if (!list_empty(&desc->pg_list)) {
                struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
                if (index != prev->wb_index + 1)
-                       nfs_pageio_doio(desc);
+                       nfs_pageio_complete(desc);
        }
 }
 
index 29c0ca7..38e5508 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include "internal.h"
 #include "pnfs.h"
 #include "iostat.h"
@@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 void
 pnfs_destroy_all_layouts(struct nfs_client *clp)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        LIST_HEAD(tmp_list);
 
+       nfs4_deviceid_mark_client_invalid(clp);
+       nfs4_deviceid_purge_client(clp);
+
        spin_lock(&clp->cl_lock);
-       list_splice_init(&clp->cl_layouts, &tmp_list);
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               if (!list_empty(&server->layouts))
+                       list_splice_init(&server->layouts, &tmp_list);
+       }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
 
        while (!list_empty(&tmp_list)) {
@@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino)
        lrp->args.stateid = stateid;
        lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
        lrp->args.inode = ino;
+       lrp->args.layout = lo;
        lrp->clp = NFS_SERVER(ino)->nfs_client;
 
        status = nfs4_proc_layoutreturn(lrp);
@@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino,
        };
        unsigned pg_offset;
        struct nfs_inode *nfsi = NFS_I(ino);
-       struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+       struct nfs_server *server = NFS_SERVER(ino);
+       struct nfs_client *clp = server->nfs_client;
        struct pnfs_layout_hdr *lo;
        struct pnfs_layout_segment *lseg = NULL;
        bool first = false;
@@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino,
                 */
                spin_lock(&clp->cl_lock);
                BUG_ON(!list_empty(&lo->plh_layouts));
-               list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+               list_add_tail(&lo->plh_layouts, &server->layouts);
                spin_unlock(&clp->cl_lock);
        }
 
@@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino,
                arg.offset -= pg_offset;
                arg.length += pg_offset;
        }
-       arg.length = PAGE_CACHE_ALIGN(arg.length);
+       if (arg.length != NFS4_MAX_UINT64)
+               arg.length = PAGE_CACHE_ALIGN(arg.length);
 
        lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
        if (!lseg && first) {
@@ -991,6 +1004,7 @@ out_unlock:
        spin_unlock(&ino->i_lock);
        goto out;
 }
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
 
 int
 pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1062,71 @@ out_forget_reply:
        goto out;
 }
 
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_READ,
+                                          GFP_KERNEL);
+       /* If no lseg, fall back to read through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_read_mds(pgio);
+
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_RW,
+                                          GFP_NOFS);
+       /* If no lseg, fall back to write through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_write_mds(pgio);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
 bool
-pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
-                    struct nfs_page *req)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
-       enum pnfs_iomode access_type;
-       gfp_t gfp_flags;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
-       /* We assume that pg_ioflags == 0 iff we're reading a page */
-       if (pgio->pg_ioflags == 0) {
-               access_type = IOMODE_READ;
-               gfp_flags = GFP_KERNEL;
-       } else {
-               access_type = IOMODE_RW;
-               gfp_flags = GFP_NOFS;
-       }
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+       return true;
+}
 
-       if (pgio->pg_lseg == NULL) {
-               if (pgio->pg_count != prev->wb_bytes)
-                       return true;
-               /* This is first coelesce call for a series of nfs_pages */
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  prev->wb_context,
-                                                  req_offset(prev),
-                                                  pgio->pg_count,
-                                                  access_type,
-                                                  gfp_flags);
-               if (pgio->pg_lseg == NULL)
-                       return true;
-       }
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+       return true;
+}
+
+bool
+pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+                    struct nfs_page *req)
+{
+       if (pgio->pg_lseg == NULL)
+               return nfs_generic_pg_test(pgio, prev, req);
 
        /*
         * Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_write_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_write_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
-                       const struct rpc_call_ops *call_ops, int how)
+                       const struct rpc_call_ops *call_ops,
+                       struct pnfs_layout_segment *lseg,
+                       int how)
 {
        struct inode *inode = wdata->inode;
        enum pnfs_try_status trypnfs;
        struct nfs_server *nfss = NFS_SERVER(inode);
 
        wdata->mds_ops = call_ops;
+       wdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
                inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+       struct nfs_write_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_write_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_write_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_flush(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_read_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_read_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_readdata_release(data);
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
-enum pnfs_try_status
+static enum pnfs_try_status
 pnfs_try_to_read_data(struct nfs_read_data *rdata,
-                      const struct rpc_call_ops *call_ops)
+                      const struct rpc_call_ops *call_ops,
+                      struct pnfs_layout_segment *lseg)
 {
        struct inode *inode = rdata->inode;
        struct nfs_server *nfss = NFS_SERVER(inode);
        enum pnfs_try_status trypnfs;
 
        rdata->mds_ops = call_ops;
+       rdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Reading ino:%lu %u@%llu\n",
                __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       struct nfs_read_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_read_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_read_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_pagein(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_reads(desc, &head);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
 /*
  * Currently there is only one (whole file) write lseg.
  */
index 96bf4e6..078670d 100644 (file)
@@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
        void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
        /* test for nfs page cache coalescing */
-       bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+       const struct nfs_pageio_ops *pg_read_ops;
+       const struct nfs_pageio_ops *pg_write_ops;
 
        /* Returns true if layoutdriver wants to divert this request to
         * driver's commit routine.
@@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 /* pnfs.c */
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
-struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-                  loff_t pos, u64 count, enum pnfs_iomode access_type,
-                  gfp_t gfp_flags);
+
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
-enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
-                                            const struct rpc_call_ops *, int);
-enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
-                                           const struct rpc_call_ops *);
+void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
+void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
 bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_ld_write_done(struct nfs_write_data *);
 int pnfs_ld_read_done(struct nfs_read_data *);
+struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
+                                              struct nfs_open_context *ctx,
+                                              loff_t pos,
+                                              u64 count,
+                                              enum pnfs_iomode iomode,
+                                              gfp_t gfp_flags);
+
+void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
+
+/* nfs4_deviceid_flags */
+enum {
+       NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
+};
 
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
@@ -189,13 +203,13 @@ struct nfs4_deviceid_node {
        struct hlist_node               tmpnode;
        const struct pnfs_layoutdriver_type *ld;
        const struct nfs_client         *nfs_client;
+       unsigned long                   flags;
        struct nfs4_deviceid            deviceid;
        atomic_t                        ref;
 };
 
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
-struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
                             const struct pnfs_layoutdriver_type *,
@@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino)
        return 0;
 }
 
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
-                                   struct inode *inode)
-{
-       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-
-       if (ld)
-               pgio->pg_test = ld->pg_test;
-}
-
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
 {
 }
 
-static inline struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-                  loff_t pos, u64 count, enum pnfs_iomode access_type,
-                  gfp_t gfp_flags)
-{
-       return NULL;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *data,
-                     const struct rpc_call_ops *call_ops)
-{
-       return PNFS_NOT_ATTEMPTED;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *data,
-                      const struct rpc_call_ops *call_ops, int how)
-{
-       return PNFS_NOT_ATTEMPTED;
-}
-
 static inline int pnfs_return_layout(struct inode *ino)
 {
        return 0;
@@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
 {
 }
 
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
-                                   struct inode *inode)
+static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
+       return false;
+}
+
+static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+       return false;
 }
 
 static inline void
index f0f8e1e..6fda522 100644 (file)
@@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
 
        rcu_read_lock();
        d = _lookup_deviceid(ld, clp, id, hash);
-       if (d && !atomic_inc_not_zero(&d->ref))
-               d = NULL;
+       if (d != NULL)
+               atomic_inc(&d->ref);
        rcu_read_unlock();
        return d;
 }
@@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
 
 /*
- * Unhash and put deviceid
+ * Remove a deviceid from cache
  *
  * @clp nfs_client associated with deviceid
  * @id the deviceid to unhash
  *
  * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
  */
-struct nfs4_deviceid_node *
-nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
+void
+nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
                         const struct nfs_client *clp, const struct nfs4_deviceid *id)
 {
        struct nfs4_deviceid_node *d;
@@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
        rcu_read_unlock();
        if (!d) {
                spin_unlock(&nfs4_deviceid_lock);
-               return NULL;
+               return;
        }
        hlist_del_init_rcu(&d->node);
        spin_unlock(&nfs4_deviceid_lock);
@@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
 
        /* balance the initial ref set in pnfs_insert_deviceid */
        if (atomic_dec_and_test(&d->ref))
-               return d;
-
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
-
-/*
- * Delete a deviceid from cache
- *
- * @clp struct nfs_client qualifying the deviceid
- * @id deviceid to delete
- */
-void
-nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
-                    const struct nfs_client *clp, const struct nfs4_deviceid *id)
-{
-       struct nfs4_deviceid_node *d;
-
-       d = nfs4_unhash_put_deviceid(ld, clp, id);
-       if (!d)
-               return;
-       d->ld->free_deviceid_node(d);
+               d->ld->free_deviceid_node(d);
 }
 EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
 
@@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
        INIT_HLIST_NODE(&d->tmpnode);
        d->ld = ld;
        d->nfs_client = nfs_client;
+       d->flags = 0;
        d->deviceid = *id;
        atomic_set(&d->ref, 1);
 }
@@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
  *
  * @d deviceid node to put
  *
- * @ret true iff the node was deleted
+ * return true iff the node was deleted
+ * Note that since the test for d->ref == 0 is sufficient to establish
+ * that the node is no longer hashed in the global device id cache.
  */
 bool
 nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 {
-       if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
+       if (!atomic_dec_and_test(&d->ref))
                return false;
-       hlist_del_init_rcu(&d->node);
-       spin_unlock(&nfs4_deviceid_lock);
-       synchronize_rcu();
        d->ld->free_deviceid_node(d);
        return true;
 }
@@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
        for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
                _deviceid_purge_client(clp, h);
 }
+
+/*
+ * Stop use of all deviceids associated with an nfs_client
+ */
+void
+nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
+{
+       struct nfs4_deviceid_node *d;
+       struct hlist_node *n;
+       int i;
+
+       rcu_read_lock();
+       for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
+               hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+                       if (d->nfs_client == clp)
+                               set_bit(NFS_DEVICEID_INVALID, &d->flags);
+       }
+       rcu_read_unlock();
+}
index a68679f..2171c04 100644 (file)
@@ -30,8 +30,7 @@
 
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
+static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
@@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
        mempool_free(p, nfs_rdata_mempool);
 }
 
-static void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_read_data *rdata)
 {
        put_lseg(rdata->lseg);
        put_nfs_open_context(rdata->args.context);
@@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
        }
 }
 
+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+               struct inode *inode)
+{
+       nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+                       NFS_SERVER(inode)->rsize, 0);
+}
+
+void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
+{
+       pgio->pg_ops = &nfs_pageio_read_ops;
+       pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+
+static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+               struct inode *inode)
+{
+       if (!pnfs_pageio_init_read(pgio, inode))
+               nfs_pageio_init_read_mds(pgio, inode);
+}
+
 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
                       struct page *page)
 {
@@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
        if (len < PAGE_CACHE_SIZE)
                zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-       nfs_pageio_init(&pgio, inode, NULL, 0, 0);
-       nfs_list_add_request(new, &pgio.pg_list);
-       pgio.pg_count = len;
-
-       if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
-               nfs_pagein_multi(&pgio);
-       else
-               nfs_pagein_one(&pgio);
+       nfs_pageio_init_read(&pgio, inode);
+       nfs_pageio_add_request(&pgio, new);
+       nfs_pageio_complete(&pgio);
        return 0;
 }
 
@@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
-               const struct rpc_call_ops *call_ops,
-               unsigned int count, unsigned int offset,
-               struct pnfs_layout_segment *lseg)
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+               unsigned int count, unsigned int offset)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
 
        data->req         = req;
        data->inode       = inode;
        data->cred        = req->wb_context->cred;
-       data->lseg        = get_lseg(lseg);
 
        data->args.fh     = NFS_FH(inode);
        data->args.offset = req_offset(req) + offset;
@@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
        data->res.count   = count;
        data->res.eof     = 0;
        nfs_fattr_init(&data->fattr);
+}
 
-       if (data->lseg &&
-           (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
-               return 0;
+static int nfs_do_read(struct nfs_read_data *data,
+               const struct rpc_call_ops *call_ops)
+{
+       struct inode *inode = data->args.context->dentry->d_inode;
 
        return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
 }
 
+static int
+nfs_do_multiple_reads(struct list_head *head,
+               const struct rpc_call_ops *call_ops)
+{
+       struct nfs_read_data *data;
+       int ret = 0;
+
+       while (!list_empty(head)) {
+               int ret2;
+
+               data = list_entry(head->next, struct nfs_read_data, list);
+               list_del_init(&data->list);
+
+               ret2 = nfs_do_read(data, call_ops);
+               if (ret == 0)
+                       ret = ret2;
+       }
+       return ret;
+}
+
 static void
 nfs_async_read_error(struct list_head *head)
 {
@@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head)
  * won't see the new data until our attribute cache is updated.  This is more
  * or less conventional NFS client behavior.
  */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
        struct page *page = req->wb_page;
        struct nfs_read_data *data;
-       size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
+       size_t rsize = desc->pg_bsize, nbytes;
        unsigned int offset;
        int requests = 0;
        int ret = 0;
-       struct pnfs_layout_segment *lseg;
-       LIST_HEAD(list);
 
        nfs_list_remove_request(req);
 
+       offset = 0;
        nbytes = desc->pg_count;
        do {
                size_t len = min(nbytes,rsize);
@@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
                data = nfs_readdata_alloc(1);
                if (!data)
                        goto out_bad;
-               list_add(&data->pages, &list);
+               data->pagevec[0] = page;
+               nfs_read_rpcsetup(req, data, len, offset);
+               list_add(&data->list, res);
                requests++;
                nbytes -= len;
+               offset += len;
        } while(nbytes != 0);
        atomic_set(&req->wb_complete, requests);
-
-       BUG_ON(desc->pg_lseg != NULL);
-       lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                 req_offset(req), desc->pg_count,
-                                 IOMODE_READ, GFP_KERNEL);
        ClearPageError(page);
-       offset = 0;
-       nbytes = desc->pg_count;
-       do {
-               int ret2;
-
-               data = list_entry(list.next, struct nfs_read_data, pages);
-               list_del_init(&data->pages);
-
-               data->pagevec[0] = page;
-
-               if (nbytes < rsize)
-                       rsize = nbytes;
-               ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
-                                        rsize, offset, lseg);
-               if (ret == 0)
-                       ret = ret2;
-               offset += rsize;
-               nbytes -= rsize;
-       } while (nbytes != 0);
-       put_lseg(lseg);
-       desc->pg_lseg = NULL;
-
+       desc->pg_rpc_callops = &nfs_read_partial_ops;
        return ret;
-
 out_bad:
-       while (!list_empty(&list)) {
-               data = list_entry(list.next, struct nfs_read_data, pages);
-               list_del(&data->pages);
+       while (!list_empty(res)) {
+               data = list_entry(res->next, struct nfs_read_data, list);
+               list_del(&data->list);
                nfs_readdata_free(data);
        }
        SetPageError(page);
@@ -327,19 +336,19 @@ out_bad:
        return -ENOMEM;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page         *req;
        struct page             **pages;
        struct nfs_read_data    *data;
        struct list_head *head = &desc->pg_list;
-       struct pnfs_layout_segment *lseg = desc->pg_lseg;
-       int ret = -ENOMEM;
+       int ret = 0;
 
        data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
                                                     desc->pg_count));
        if (!data) {
                nfs_async_read_error(head);
+               ret = -ENOMEM;
                goto out;
        }
 
@@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
                *pages++ = req->wb_page;
        }
        req = nfs_list_entry(data->pages.next);
-       if ((!lseg) && list_is_singular(&data->pages))
-               lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                         req_offset(req), desc->pg_count,
-                                         IOMODE_READ, GFP_KERNEL);
 
-       ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
-                               0, lseg);
+       nfs_read_rpcsetup(req, data, desc->pg_count, 0);
+       list_add(&data->list, res);
+       desc->pg_rpc_callops = &nfs_read_full_ops;
 out:
-       put_lseg(lseg);
-       desc->pg_lseg = NULL;
        return ret;
 }
 
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       if (desc->pg_bsize < PAGE_CACHE_SIZE)
+               return nfs_pagein_multi(desc, head);
+       return nfs_pagein_one(desc, head);
+}
+
+static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_pagein(desc, &head);
+       if (ret == 0)
+               ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+       return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_read_ops = {
+       .pg_test = nfs_generic_pg_test,
+       .pg_doio = nfs_generic_pg_readpages,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
                .pgio = &pgio,
        };
        struct inode *inode = mapping->host;
-       struct nfs_server *server = NFS_SERVER(inode);
-       size_t rsize = server->rsize;
        unsigned long npages;
        int ret = -ESTALE;
 
@@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
        if (ret == 0)
                goto read_complete; /* all pages were read */
 
-       if (rsize < PAGE_CACHE_SIZE)
-               nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
-       else
-               nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
+       nfs_pageio_init_read(&pgio, inode);
 
        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
index 8d6864c..b2fbbde 100644 (file)
@@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 
        alias = d_lookup(parent, &data->args.name);
        if (alias != NULL) {
-               int ret = 0;
+               int ret;
                void *devname_garbage = NULL;
 
                /*
@@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                 * the sillyrename information to the aliased dentry.
                 */
                nfs_free_dname(data);
+               ret = nfs_copy_dname(alias, data);
                spin_lock(&alias->d_lock);
-               if (alias->d_inode != NULL &&
+               if (ret == 0 && alias->d_inode != NULL &&
                    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
                        devname_garbage = alias->d_fsdata;
                        alias->d_fsdata = data;
                        alias->d_flags |= DCACHE_NFSFS_RENAMED;
                        ret = 1;
-               }
+               } else
+                       ret = 0;
                spin_unlock(&alias->d_lock);
                nfs_dec_sillycount(dir);
                dput(alias);
@@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                 * point dentry is definitely not a root, so we won't need
                 * that anymore.
                 */
-               if (devname_garbage)
-                       kfree(devname_garbage);
+               kfree(devname_garbage);
                return ret;
        }
        data->dir = igrab(dir);
@@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
        if (parent == NULL)
                goto out_free;
        dir = parent->d_inode;
-       if (nfs_copy_dname(dentry, data) != 0)
-               goto out_dput;
        /* Non-exclusive lock protects against concurrent lookup() calls */
        spin_lock(&dir->i_lock);
        if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
        struct nfs_renamedata *data = calldata;
        struct inode *old_dir = data->old_dir;
        struct inode *new_dir = data->new_dir;
+       struct dentry *old_dentry = data->old_dentry;
+       struct dentry *new_dentry = data->new_dentry;
 
        if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
                nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
@@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
        }
 
        if (task->tk_status != 0) {
-               nfs_cancel_async_unlink(data->old_dentry);
+               nfs_cancel_async_unlink(old_dentry);
                return;
        }
 
-       nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
-       d_move(data->old_dentry, data->new_dentry);
+       d_drop(old_dentry);
+       d_drop(new_dentry);
 }
 
 /**
@@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
  * and only performs the unlink once the last reference to it is put.
  *
  * The final cleanup is done during dentry_iput.
+ *
+ * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
+ * could take responsibility for keeping open files referenced.  The server
+ * would also need to ensure that opened-but-deleted files were kept over
+ * reboots.  However, we may not assume a server does so.  (RFC 5661
+ * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
+ * use to advertise that it does this; some day we may take advantage of
+ * it.))
  */
 int
 nfs_sillyrename(struct inode *dir, struct dentry *dentry)
@@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
        if (error)
                goto out_dput;
 
+       /* populate unlinkdata with the right dname */
+       error = nfs_copy_dname(sdentry,
+                               (struct nfs_unlinkdata *)dentry->d_fsdata);
+       if (error) {
+               nfs_cancel_async_unlink(dentry);
+               goto out_dput;
+       }
+
        /* run the rename task, undo unlink if it fails */
        task = nfs_async_rename(dir, dir, dentry, sdentry);
        if (IS_ERR(task)) {
index 00e3750..b39b37f 100644 (file)
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
        mempool_free(p, nfs_wdata_mempool);
 }
 
-static void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_write_data *wdata)
 {
        put_lseg(wdata->lseg);
        put_nfs_open_context(wdata->args.context);
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static int nfs_write_rpcsetup(struct nfs_page *req,
+static void nfs_write_rpcsetup(struct nfs_page *req,
                struct nfs_write_data *data,
-               const struct rpc_call_ops *call_ops,
                unsigned int count, unsigned int offset,
-               struct pnfs_layout_segment *lseg,
                int how)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
        data->req = req;
        data->inode = inode = req->wb_context->dentry->d_inode;
        data->cred = req->wb_context->cred;
-       data->lseg = get_lseg(lseg);
 
        data->args.fh     = NFS_FH(inode);
        data->args.offset = req_offset(req) + offset;
@@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
        data->args.context = get_nfs_open_context(req->wb_context);
        data->args.lock_context = req->wb_lock_context;
        data->args.stable  = NFS_UNSTABLE;
-       if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
-               data->args.stable = NFS_DATA_SYNC;
-               if (!nfs_need_commit(NFS_I(inode)))
-                       data->args.stable = NFS_FILE_SYNC;
+       switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+       case 0:
+               break;
+       case FLUSH_COND_STABLE:
+               if (nfs_need_commit(NFS_I(inode)))
+                       break;
+       default:
+               data->args.stable = NFS_FILE_SYNC;
        }
 
        data->res.fattr   = &data->fattr;
        data->res.count   = count;
        data->res.verf    = &data->verf;
        nfs_fattr_init(&data->fattr);
+}
 
-       if (data->lseg &&
-           (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
-               return 0;
+static int nfs_do_write(struct nfs_write_data *data,
+               const struct rpc_call_ops *call_ops,
+               int how)
+{
+       struct inode *inode = data->args.context->dentry->d_inode;
 
        return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
 }
 
+static int nfs_do_multiple_writes(struct list_head *head,
+               const struct rpc_call_ops *call_ops,
+               int how)
+{
+       struct nfs_write_data *data;
+       int ret = 0;
+
+       while (!list_empty(head)) {
+               int ret2;
+
+               data = list_entry(head->next, struct nfs_write_data, list);
+               list_del_init(&data->list);
+               
+               ret2 = nfs_do_write(data, call_ops, how);
+                if (ret == 0)
+                        ret = ret2;
+       }
+       return ret;
+}
+
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
  * call this on each, which will prepare them to be retried on next
  * writeback using standard nfs.
@@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req)
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
        struct page *page = req->wb_page;
        struct nfs_write_data *data;
-       size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
+       size_t wsize = desc->pg_bsize, nbytes;
        unsigned int offset;
        int requests = 0;
        int ret = 0;
-       struct pnfs_layout_segment *lseg;
-       LIST_HEAD(list);
 
        nfs_list_remove_request(req);
 
@@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 
+       offset = 0;
        nbytes = desc->pg_count;
        do {
                size_t len = min(nbytes, wsize);
@@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
                data = nfs_writedata_alloc(1);
                if (!data)
                        goto out_bad;
-               list_add(&data->pages, &list);
+               data->pagevec[0] = page;
+               nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+               list_add(&data->list, res);
                requests++;
                nbytes -= len;
+               offset += len;
        } while (nbytes != 0);
        atomic_set(&req->wb_complete, requests);
-
-       BUG_ON(desc->pg_lseg);
-       lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                 req_offset(req), desc->pg_count,
-                                 IOMODE_RW, GFP_NOFS);
-       ClearPageError(page);
-       offset = 0;
-       nbytes = desc->pg_count;
-       do {
-               int ret2;
-
-               data = list_entry(list.next, struct nfs_write_data, pages);
-               list_del_init(&data->pages);
-
-               data->pagevec[0] = page;
-
-               if (nbytes < wsize)
-                       wsize = nbytes;
-               ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
-                                         wsize, offset, lseg, desc->pg_ioflags);
-               if (ret == 0)
-                       ret = ret2;
-               offset += wsize;
-               nbytes -= wsize;
-       } while (nbytes != 0);
-
-       put_lseg(lseg);
-       desc->pg_lseg = NULL;
+       desc->pg_rpc_callops = &nfs_write_partial_ops;
        return ret;
 
 out_bad:
-       while (!list_empty(&list)) {
-               data = list_entry(list.next, struct nfs_write_data, pages);
-               list_del(&data->pages);
+       while (!list_empty(res)) {
+               data = list_entry(res->next, struct nfs_write_data, list);
+               list_del(&data->list);
                nfs_writedata_free(data);
        }
        nfs_redirty_request(req);
@@ -987,14 +986,13 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page         *req;
        struct page             **pages;
        struct nfs_write_data   *data;
        struct list_head *head = &desc->pg_list;
-       struct pnfs_layout_segment *lseg = desc->pg_lseg;
-       int ret;
+       int ret = 0;
 
        data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
                                                      desc->pg_count));
@@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
                *pages++ = req->wb_page;
        }
        req = nfs_list_entry(data->pages.next);
-       if ((!lseg) && list_is_singular(&data->pages))
-               lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                         req_offset(req), desc->pg_count,
-                                         IOMODE_RW, GFP_NOFS);
 
        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
            (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
        /* Set up the argument struct */
-       ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+       nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
+       list_add(&data->list, res);
+       desc->pg_rpc_callops = &nfs_write_full_ops;
 out:
-       put_lseg(lseg); /* Cleans any gotten in ->pg_test */
-       desc->pg_lseg = NULL;
        return ret;
 }
 
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       if (desc->pg_bsize < PAGE_CACHE_SIZE)
+               return nfs_flush_multi(desc, head);
+       return nfs_flush_one(desc, head);
+}
+
+static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_flush(desc, &head);
+       if (ret == 0)
+               ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
+                               desc->pg_ioflags);
+       return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_write_ops = {
+       .pg_test = nfs_generic_pg_test,
+       .pg_doio = nfs_generic_pg_writepages,
+};
+
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
                                  struct inode *inode, int ioflags)
 {
-       size_t wsize = NFS_SERVER(inode)->wsize;
+       nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+                               NFS_SERVER(inode)->wsize, ioflags);
+}
+
+void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
+{
+       pgio->pg_ops = &nfs_pageio_write_ops;
+       pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
-       if (wsize < PAGE_CACHE_SIZE)
-               nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
-       else
-               nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
+static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+                                 struct inode *inode, int ioflags)
+{
+       if (!pnfs_pageio_init_write(pgio, inode, ioflags))
+               nfs_pageio_init_write_mds(pgio, inode, ioflags);
 }
 
 /*
index 3b8d397..98e5442 100644 (file)
@@ -93,7 +93,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
 
        memset(bh->b_data, 0, sizeof(struct omfs_inode));
 
-       if (inode->i_mode & S_IFDIR) {
+       if (S_ISDIR(inode->i_mode)) {
                memset(&bh->b_data[OMFS_DIR_START], 0xff,
                        sbi->s_sys_blocksize - OMFS_DIR_START);
        } else
index 739b751..f711921 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -446,74 +446,52 @@ out:
        return error;
 }
 
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+static int chmod_common(struct path *path, umode_t mode)
 {
-       struct inode * inode;
-       struct dentry * dentry;
-       struct file * file;
-       int err = -EBADF;
+       struct inode *inode = path->dentry->d_inode;
        struct iattr newattrs;
+       int error;
 
-       file = fget(fd);
-       if (!file)
-               goto out;
-
-       dentry = file->f_path.dentry;
-       inode = dentry->d_inode;
-
-       audit_inode(NULL, dentry);
-
-       err = mnt_want_write_file(file);
-       if (err)
-               goto out_putf;
+       error = mnt_want_write(path->mnt);
+       if (error)
+               return error;
        mutex_lock(&inode->i_mutex);
-       err = security_path_chmod(dentry, file->f_vfsmnt, mode);
-       if (err)
+       error = security_path_chmod(path->dentry, path->mnt, mode);
+       if (error)
                goto out_unlock;
-       if (mode == (mode_t) -1)
-               mode = inode->i_mode;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-       err = notify_change(dentry, &newattrs);
+       error = notify_change(path->dentry, &newattrs);
 out_unlock:
        mutex_unlock(&inode->i_mutex);
-       mnt_drop_write(file->f_path.mnt);
-out_putf:
-       fput(file);
-out:
+       mnt_drop_write(path->mnt);
+       return error;
+}
+
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+{
+       struct file * file;
+       int err = -EBADF;
+
+       file = fget(fd);
+       if (file) {
+               audit_inode(NULL, file->f_path.dentry);
+               err = chmod_common(&file->f_path, mode);
+               fput(file);
+       }
        return err;
 }
 
 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
 {
        struct path path;
-       struct inode *inode;
        int error;
-       struct iattr newattrs;
 
        error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
-       if (error)
-               goto out;
-       inode = path.dentry->d_inode;
-
-       error = mnt_want_write(path.mnt);
-       if (error)
-               goto dput_and_out;
-       mutex_lock(&inode->i_mutex);
-       error = security_path_chmod(path.dentry, path.mnt, mode);
-       if (error)
-               goto out_unlock;
-       if (mode == (mode_t) -1)
-               mode = inode->i_mode;
-       newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
-       newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-       error = notify_change(path.dentry, &newattrs);
-out_unlock:
-       mutex_unlock(&inode->i_mutex);
-       mnt_drop_write(path.mnt);
-dput_and_out:
-       path_put(&path);
-out:
+       if (!error) {
+               error = chmod_common(&path, mode);
+               path_put(&path);
+       }
        return error;
 }
 
index 1b7f9af..0e0be1d 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -948,7 +948,7 @@ static const struct dentry_operations pipefs_dentry_operations = {
 
 static struct inode * get_pipe_inode(void)
 {
-       struct inode *inode = new_inode(pipe_mnt->mnt_sb);
+       struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
        struct pipe_inode_info *pipe;
 
        if (!inode)
index f1637f1..9d99131 100644 (file)
@@ -620,8 +620,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
        if (!ent) goto out;
 
        memset(ent, 0, sizeof(struct proc_dir_entry));
-       memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
-       ent->name = ((char *) ent) + sizeof(*ent);
+       memcpy(ent->name, fn, len + 1);
        ent->namelen = len;
        ent->mode = mode;
        ent->nlink = nlink;
index 9020ac1..f738024 100644 (file)
@@ -197,15 +197,15 @@ static __net_init int proc_net_ns_init(struct net *net)
        int err;
 
        err = -ENOMEM;
-       netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+       netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
        if (!netd)
                goto out;
 
        netd->data = net;
        netd->nlink = 2;
-       netd->name = "net";
        netd->namelen = 3;
        netd->parent = &proc_root;
+       memcpy(netd->name, "net", 4);
 
        err = -EEXIST;
        net_statd = proc_net_mkdir(net, "stat", netd);
index d6c3b41..9a8a2b7 100644 (file)
@@ -186,13 +186,13 @@ static const struct inode_operations proc_root_inode_operations = {
 struct proc_dir_entry proc_root = {
        .low_ino        = PROC_ROOT_INO, 
        .namelen        = 5, 
-       .name           = "/proc",
        .mode           = S_IFDIR | S_IRUGO | S_IXUGO, 
        .nlink          = 2, 
        .count          = ATOMIC_INIT(1),
        .proc_iops      = &proc_root_inode_operations, 
        .proc_fops      = &proc_root_operations,
        .parent         = &proc_root,
+       .name           = "/proc",
 };
 
 int pid_ns_prepare_proc(struct pid_namespace *ns)
index 5907b49..179f1c3 100644 (file)
@@ -166,8 +166,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
                         * long as offset isn't at the end of the file then the
                         * offset is data.
                         */
-                       if (offset >= inode->i_size)
-                               return -ENXIO;
+                       if (offset >= inode->i_size) {
+                               retval = -ENXIO;
+                               goto out;
+                       }
                        break;
                case SEEK_HOLE:
                        /*
@@ -175,8 +177,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
                         * as long as offset isn't i_size or larger, return
                         * i_size.
                         */
-                       if (offset >= inode->i_size)
-                               return -ENXIO;
+                       if (offset >= inode->i_size) {
+                               retval = -ENXIO;
+                               goto out;
+                       }
                        offset = inode->i_size;
                        break;
        }
index b2b4119..d1fe745 100644 (file)
@@ -1224,6 +1224,9 @@ _xfs_buf_ioapply(
                rw = READ;
        }
 
+       /* we only use the buffer cache for meta-data */
+       rw |= REQ_META;
+
 next_chunk:
        atomic_inc(&bp->b_io_remaining);
        nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
index 825390e..7f7b424 100644 (file)
@@ -149,7 +149,9 @@ xfs_file_fsync(
 
        xfs_iflags_clear(ip, XFS_ITRUNCATED);
 
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
        xfs_ioend_wait(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
        if (mp->m_flags & XFS_MOUNT_BARRIER) {
                /*
index acca2c5..f7ce7de 100644 (file)
@@ -265,7 +265,7 @@ xfs_open_by_handle(
                return PTR_ERR(filp);
        }
 
-       if (inode->i_mode & S_IFREG) {
+       if (S_ISREG(inode->i_mode)) {
                filp->f_flags |= O_NOATIME;
                filp->f_mode |= FMODE_NOCMTIME;
        }
@@ -850,14 +850,14 @@ xfs_set_diflags(
                di_flags |= XFS_DIFLAG_NODEFRAG;
        if (xflags & XFS_XFLAG_FILESTREAM)
                di_flags |= XFS_DIFLAG_FILESTREAM;
-       if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+       if (S_ISDIR(ip->i_d.di_mode)) {
                if (xflags & XFS_XFLAG_RTINHERIT)
                        di_flags |= XFS_DIFLAG_RTINHERIT;
                if (xflags & XFS_XFLAG_NOSYMLINKS)
                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
                if (xflags & XFS_XFLAG_EXTSZINHERIT)
                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-       } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+       } else if (S_ISREG(ip->i_d.di_mode)) {
                if (xflags & XFS_XFLAG_REALTIME)
                        di_flags |= XFS_DIFLAG_REALTIME;
                if (xflags & XFS_XFLAG_EXTSIZE)
index 6544c32..b9c172b 100644 (file)
@@ -1194,9 +1194,14 @@ xfs_setup_inode(
                break;
        }
 
-       /* if there is no attribute fork no ACL can exist on this inode */
-       if (!XFS_IFORK_Q(ip))
+       /*
+        * If there is no attribute fork no ACL can exist on this inode,
+        * and it can't have any file capabilities attached to it either.
+        */
+       if (!XFS_IFORK_Q(ip)) {
+               inode_has_no_xattr(inode);
                cache_no_acl(inode);
+       }
 
        xfs_iflags_clear(ip, XFS_INEW);
        barrier();
index c51a3f9..ab3e5c6 100644 (file)
@@ -414,7 +414,7 @@ xfs_bmap_add_attrfork_local(
 
        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
                return 0;
-       if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+       if (S_ISDIR(ip->i_d.di_mode)) {
                mp = ip->i_mount;
                memset(&dargs, 0, sizeof(dargs));
                dargs.dp = ip;
@@ -3344,8 +3344,7 @@ xfs_bmap_local_to_extents(
         * We don't want to deal with the case of keeping inode data inline yet.
         * So sending the data fork of a regular inode is invalid.
         */
-       ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
-                whichfork == XFS_DATA_FORK));
+       ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
        flags = 0;
@@ -4052,7 +4051,7 @@ xfs_bmap_one_block(
 
 #ifndef DEBUG
        if (whichfork == XFS_DATA_FORK) {
-               return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
+               return S_ISREG(ip->i_d.di_mode) ?
                        (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
                        (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
        }
index 2925726..5bfcb87 100644 (file)
@@ -692,6 +692,24 @@ xfs_da_join(xfs_da_state_t *state)
        return(error);
 }
 
+#ifdef DEBUG
+static void
+xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
+{
+       __be16  magic = blkinfo->magic;
+
+       if (level == 1) {
+               ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+                      magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+       } else
+               ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+       ASSERT(!blkinfo->forw);
+       ASSERT(!blkinfo->back);
+}
+#else  /* !DEBUG */
+#define        xfs_da_blkinfo_onlychild_validate(blkinfo, level)
+#endif /* !DEBUG */
+
 /*
  * We have only one entry in the root.  Copy the only remaining child of
  * the old root to block 0 as the new root node.
@@ -700,8 +718,6 @@ STATIC int
 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 {
        xfs_da_intnode_t *oldroot;
-       /* REFERENCED */
-       xfs_da_blkinfo_t *blkinfo;
        xfs_da_args_t *args;
        xfs_dablk_t child;
        xfs_dabuf_t *bp;
@@ -732,15 +748,9 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
        if (error)
                return(error);
        ASSERT(bp != NULL);
-       blkinfo = bp->data;
-       if (be16_to_cpu(oldroot->hdr.level) == 1) {
-               ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-                      blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-       } else {
-               ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-       }
-       ASSERT(!blkinfo->forw);
-       ASSERT(!blkinfo->back);
+       xfs_da_blkinfo_onlychild_validate(bp->data,
+                                       be16_to_cpu(oldroot->hdr.level));
+
        memcpy(root_blk->bp->data, bp->data, state->blocksize);
        xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
        error = xfs_da_shrink_inode(args, child, bp);
index 4580ce0..a2e2701 100644 (file)
@@ -121,7 +121,7 @@ xfs_dir_isempty(
 {
        xfs_dir2_sf_hdr_t       *sfp;
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
        if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
                return 1;
        if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -179,7 +179,7 @@ xfs_dir_init(
        memset((char *)&args, 0, sizeof(args));
        args.dp = dp;
        args.trans = tp;
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
        if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
                return error;
        return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -202,7 +202,7 @@ xfs_dir_createname(
        int                     rval;
        int                     v;              /* type-checking value */
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
        if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
                return rval;
        XFS_STATS_INC(xs_dir_create);
@@ -278,7 +278,7 @@ xfs_dir_lookup(
        int             rval;
        int             v;              /* type-checking value */
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_lookup);
 
        memset(&args, 0, sizeof(xfs_da_args_t));
@@ -333,7 +333,7 @@ xfs_dir_removename(
        int             rval;
        int             v;              /* type-checking value */
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_remove);
 
        memset(&args, 0, sizeof(xfs_da_args_t));
@@ -382,7 +382,7 @@ xfs_readdir(
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return XFS_ERROR(EIO);
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_getdents);
 
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -414,7 +414,7 @@ xfs_dir_replace(
        int             rval;
        int             v;              /* type-checking value */
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
 
        if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
                return rval;
@@ -464,7 +464,7 @@ xfs_dir_canenter(
        if (resblks)
                return 0;
 
-       ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
 
        memset(&args, 0, sizeof(xfs_da_args_t));
        args.name = name->name;
index 9124425..3ff3d9e 100644 (file)
@@ -344,9 +344,9 @@ _xfs_filestream_update_ag(
         * Either ip is a regular file and pip is a directory, or ip is a
         * directory and pip is NULL.
         */
-       ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
-                      (pip->i_d.di_mode & S_IFDIR)) ||
-                     ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+       ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
+                      S_ISDIR(pip->i_d.di_mode)) ||
+                     (S_ISDIR(ip->i_d.di_mode) && !pip)));
 
        mp = ip->i_mount;
        cache = mp->m_filestream;
@@ -537,7 +537,7 @@ xfs_filestream_lookup_ag(
        xfs_agnumber_t  ag;
        int             ref;
 
-       if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+       if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
                ASSERT(0);
                return NULLAGNUMBER;
        }
@@ -579,9 +579,9 @@ xfs_filestream_associate(
        xfs_agnumber_t  ag, rotorstep, startag;
        int             err = 0;
 
-       ASSERT(pip->i_d.di_mode & S_IFDIR);
-       ASSERT(ip->i_d.di_mode & S_IFREG);
-       if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+       ASSERT(S_ISDIR(pip->i_d.di_mode));
+       ASSERT(S_ISREG(ip->i_d.di_mode));
+       if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
                return -EINVAL;
 
        mp = pip->i_mount;
index 3cc21dd..2fcca4b 100644 (file)
@@ -368,7 +368,7 @@ xfs_iformat(
                        /*
                         * no local regular files yet
                         */
-                       if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
+                       if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
                                xfs_warn(ip->i_mount,
                        "corrupt inode %Lu (local format for regular file).",
                                        (unsigned long long) ip->i_ino);
@@ -1040,7 +1040,7 @@ xfs_ialloc(
 
        if (pip && XFS_INHERIT_GID(pip)) {
                ip->i_d.di_gid = pip->i_d.di_gid;
-               if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
+               if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
                        ip->i_d.di_mode |= S_ISGID;
                }
        }
@@ -1097,14 +1097,14 @@ xfs_ialloc(
                if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
                        uint    di_flags = 0;
 
-                       if ((mode & S_IFMT) == S_IFDIR) {
+                       if (S_ISDIR(mode)) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
                                if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
                                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
                                        ip->i_d.di_extsize = pip->i_d.di_extsize;
                                }
-                       } else if ((mode & S_IFMT) == S_IFREG) {
+                       } else if (S_ISREG(mode)) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1188,7 +1188,7 @@ xfs_isize_check(
        int                     nimaps;
        xfs_bmbt_irec_t         imaps[2];
 
-       if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+       if (!S_ISREG(ip->i_d.di_mode))
                return;
 
        if (XFS_IS_REALTIME_INODE(ip))
@@ -1828,7 +1828,7 @@ xfs_ifree(
        ASSERT(ip->i_d.di_nextents == 0);
        ASSERT(ip->i_d.di_anextents == 0);
        ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
-              ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
+              (!S_ISREG(ip->i_d.di_mode)));
        ASSERT(ip->i_d.di_nblocks == 0);
 
        /*
@@ -2671,7 +2671,7 @@ xfs_iflush_int(
                        __func__, ip->i_ino, ip, ip->i_d.di_magic);
                goto corrupt_out;
        }
-       if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+       if (S_ISREG(ip->i_d.di_mode)) {
                if (XFS_TEST_ERROR(
                    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
                    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2681,7 +2681,7 @@ xfs_iflush_int(
                                __func__, ip->i_ino, ip);
                        goto corrupt_out;
                }
-       } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+       } else if (S_ISDIR(ip->i_d.di_mode)) {
                if (XFS_TEST_ERROR(
                    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
                    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
index a97644a..2380a4b 100644 (file)
@@ -263,7 +263,7 @@ typedef struct xfs_inode {
        struct inode            i_vnode;        /* embedded VFS inode */
 } xfs_inode_t;
 
-#define XFS_ISIZE(ip)  (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
+#define XFS_ISIZE(ip)  S_ISREG((ip)->i_d.di_mode) ? \
                                (ip)->i_size : (ip)->i_d.di_size;
 
 /* Convert from vfs inode to xfs inode */
index 8fe4206..052a2c0 100644 (file)
@@ -2283,7 +2283,7 @@ xlog_recover_inode_pass2(
        /* Take the opportunity to reset the flush iteration count */
        dicp->di_flushiter = 0;
 
-       if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
+       if (unlikely(S_ISREG(dicp->di_mode))) {
                if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
                    (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
@@ -2296,7 +2296,7 @@ xlog_recover_inode_pass2(
                        error = EFSCORRUPTED;
                        goto error;
                }
-       } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
+       } else if (unlikely(S_ISDIR(dicp->di_mode))) {
                if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
                    (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
                    (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
index 7f25245..092e16a 100644 (file)
@@ -1331,7 +1331,7 @@ xfs_mountfs(
 
        ASSERT(rip != NULL);
 
-       if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
+       if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
                xfs_warn(mp, "corrupted root inode %llu: not a directory",
                        (unsigned long long)rip->i_ino);
                xfs_iunlock(rip, XFS_ILOCK_EXCL);
index 77a5989..df78c29 100644 (file)
@@ -116,7 +116,7 @@ xfs_rename(
        trace_xfs_rename(src_dp, target_dp, src_name, target_name);
 
        new_parent = (src_dp != target_dp);
-       src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+       src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
 
        if (src_is_directory) {
                /*
@@ -226,7 +226,7 @@ xfs_rename(
                 * target and source are directories and that target can be
                 * destroyed, or that neither is a directory.
                 */
-               if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+               if (S_ISDIR(target_ip->i_d.di_mode)) {
                        /*
                         * Make sure target dir is empty.
                         */
index 88d1214..9322e13 100644 (file)
@@ -121,7 +121,7 @@ xfs_readlink(
 
        xfs_ilock(ip, XFS_ILOCK_SHARED);
 
-       ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
+       ASSERT(S_ISLNK(ip->i_d.di_mode));
        ASSERT(ip->i_d.di_size <= MAXPATHLEN);
 
        pathlen = ip->i_d.di_size;
@@ -529,7 +529,7 @@ xfs_release(
        if (ip->i_d.di_nlink == 0)
                return 0;
 
-       if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+       if ((S_ISREG(ip->i_d.di_mode) &&
             ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
               ip->i_delayed_blks > 0)) &&
             (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
@@ -610,7 +610,7 @@ xfs_inactive(
        truncate = ((ip->i_d.di_nlink == 0) &&
            ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
             (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
-           ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
+           S_ISREG(ip->i_d.di_mode));
 
        mp = ip->i_mount;
 
@@ -621,7 +621,7 @@ xfs_inactive(
                goto out;
 
        if (ip->i_d.di_nlink != 0) {
-               if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+               if ((S_ISREG(ip->i_d.di_mode) &&
                      ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
                        ip->i_delayed_blks > 0)) &&
                      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
@@ -669,7 +669,7 @@ xfs_inactive(
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
                        return VN_INACTIVE_CACHE;
                }
-       } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
+       } else if (S_ISLNK(ip->i_d.di_mode)) {
 
                /*
                 * If we get an error while cleaning up a
index 9585501..1d45413 100644 (file)
@@ -1,6 +1,11 @@
 /*
  * Copyright (C) 2010 IBM Corporation
- * Author: Mimi Zohar <zohar@us.ibm.com>
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Mimi Zohar <zohar@us.ibm.com>
+ * Roberto Sassu <roberto.sassu@polito.it>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 
 struct encrypted_key_payload {
        struct rcu_head rcu;
+       char *format;           /* datablob: format */
        char *master_desc;      /* datablob: master key name */
        char *datalen;          /* datablob: decrypted key length */
        u8 *iv;                 /* datablob: iv */
        u8 *encrypted_data;     /* datablob: encrypted data */
        unsigned short datablob_len;    /* length of datablob */
        unsigned short decrypted_datalen;       /* decrypted data length */
-       u8 decrypted_data[0];   /* decrypted data +  datablob + hmac */
+       unsigned short payload_datalen;         /* payload data length */
+       unsigned short encrypted_key_format;    /* encrypted key format */
+       u8 *decrypted_data;     /* decrypted data */
+       u8 payload_data[0];     /* payload data + datablob + hmac */
 };
 
 extern struct key_type key_type_encrypted;
diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h
new file mode 100644 (file)
index 0000000..2224a8c
--- /dev/null
@@ -0,0 +1,113 @@
+#ifndef _LINUX_ECRYPTFS_H
+#define _LINUX_ECRYPTFS_H
+
+/* Version verification for shared data structures w/ userspace */
+#define ECRYPTFS_VERSION_MAJOR 0x00
+#define ECRYPTFS_VERSION_MINOR 0x04
+#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
+/* These flags indicate which features are supported by the kernel
+ * module; userspace tools such as the mount helper read
+ * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
+ * how to behave. */
+#define ECRYPTFS_VERSIONING_PASSPHRASE            0x00000001
+#define ECRYPTFS_VERSIONING_PUBKEY                0x00000002
+#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
+#define ECRYPTFS_VERSIONING_POLICY                0x00000008
+#define ECRYPTFS_VERSIONING_XATTR                 0x00000010
+#define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
+#define ECRYPTFS_VERSIONING_DEVMISC               0x00000040
+#define ECRYPTFS_VERSIONING_HMAC                  0x00000080
+#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION   0x00000100
+#define ECRYPTFS_VERSIONING_GCM                   0x00000200
+#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
+                                 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
+                                 | ECRYPTFS_VERSIONING_PUBKEY \
+                                 | ECRYPTFS_VERSIONING_XATTR \
+                                 | ECRYPTFS_VERSIONING_MULTKEY \
+                                 | ECRYPTFS_VERSIONING_DEVMISC \
+                                 | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
+#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
+#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
+#define ECRYPTFS_SALT_SIZE 8
+#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
+/* The original signature size is only for what is stored on disk; all
+ * in-memory representations are expanded hex, so it better adapted to
+ * be passed around or referenced on the command line */
+#define ECRYPTFS_SIG_SIZE 8
+#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
+#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
+#define ECRYPTFS_MAX_KEY_BYTES 64
+#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
+#define ECRYPTFS_FILE_VERSION 0x03
+#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
+
+#define RFC2440_CIPHER_DES3_EDE 0x02
+#define RFC2440_CIPHER_CAST_5 0x03
+#define RFC2440_CIPHER_BLOWFISH 0x04
+#define RFC2440_CIPHER_AES_128 0x07
+#define RFC2440_CIPHER_AES_192 0x08
+#define RFC2440_CIPHER_AES_256 0x09
+#define RFC2440_CIPHER_TWOFISH 0x0a
+#define RFC2440_CIPHER_CAST_6 0x0b
+
+#define RFC2440_CIPHER_RSA 0x01
+
+/**
+ * For convenience, we may need to pass around the encrypted session
+ * key between kernel and userspace because the authentication token
+ * may not be extractable.  For example, the TPM may not release the
+ * private key, instead requiring the encrypted data and returning the
+ * decrypted data.
+ */
+struct ecryptfs_session_key {
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
+#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
+#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
+       u32 flags;
+       u32 encrypted_key_size;
+       u32 decrypted_key_size;
+       u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
+       u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
+};
+
+struct ecryptfs_password {
+       u32 password_bytes;
+       s32 hash_algo;
+       u32 hash_iterations;
+       u32 session_key_encryption_key_bytes;
+#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
+#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
+       u32 flags;
+       /* Iterated-hash concatenation of salt and passphrase */
+       u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
+       u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+       /* Always in expanded hex */
+       u8 salt[ECRYPTFS_SALT_SIZE];
+};
+
+enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
+
+struct ecryptfs_private_key {
+       u32 key_size;
+       u32 data_len;
+       u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+       char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
+       u8 data[];
+};
+
+/* May be a password or a private key */
+struct ecryptfs_auth_tok {
+       u16 version; /* 8-bit major and 8-bit minor */
+       u16 token_type;
+#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
+       u32 flags;
+       struct ecryptfs_session_key session_key;
+       u8 reserved[32];
+       union {
+               struct ecryptfs_password password;
+               struct ecryptfs_private_key private_key;
+       } token;
+} __attribute__ ((packed));
+
+#endif /* _LINUX_ECRYPTFS_H */
index 5f523eb..f23bcb7 100644 (file)
@@ -2310,7 +2310,8 @@ extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void end_writeback(struct inode *);
 extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode(struct super_block *);
+extern struct inode *new_inode_pseudo(struct super_block *sb);
+extern struct inode *new_inode(struct super_block *sb);
 extern void free_inode_nonrcu(struct inode *inode);
 extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
index 3bc63e6..03489ca 100644 (file)
@@ -76,6 +76,8 @@
 #define IFF_BRIDGE_PORT        0x4000          /* device used as bridge port */
 #define IFF_OVS_DATAPATH       0x8000  /* device used as Open vSwitch
                                         * datapath port */
+#define IFF_TX_SKB_SHARING     0x10000 /* The interface supports sharing
+                                        * skbs on transmit */
 
 #define IF_GET_IFACE   0x0001          /* for querying only */
 #define IF_GET_PROTO   0x0002
index 771d6d8..068784e 100644 (file)
@@ -119,9 +119,9 @@ struct input_keymap_entry {
 #define EVIOCGSND(len)         _IOC(_IOC_READ, 'E', 0x1a, len)         /* get all sounds status */
 #define EVIOCGSW(len)          _IOC(_IOC_READ, 'E', 0x1b, len)         /* get all switch states */
 
-#define EVIOCGBIT(ev,len)      _IOC(_IOC_READ, 'E', 0x20 + ev, len)    /* get event bits */
-#define EVIOCGABS(abs)         _IOR('E', 0x40 + abs, struct input_absinfo)     /* get abs value/limits */
-#define EVIOCSABS(abs)         _IOW('E', 0xc0 + abs, struct input_absinfo)     /* set abs value/limits */
+#define EVIOCGBIT(ev,len)      _IOC(_IOC_READ, 'E', 0x20 + (ev), len)  /* get event bits */
+#define EVIOCGABS(abs)         _IOR('E', 0x40 + (abs), struct input_absinfo)   /* get abs value/limits */
+#define EVIOCSABS(abs)         _IOW('E', 0xc0 + (abs), struct input_absinfo)   /* set abs value/limits */
 
 #define EVIOCSFF               _IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect))   /* send a force effect to a force feedback device */
 #define EVIOCRMFF              _IOW('E', 0x81, int)                    /* Erase a force effect */
diff --git a/include/linux/input/kxtj9.h b/include/linux/input/kxtj9.h
new file mode 100644 (file)
index 0000000..f6bac89
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __KXTJ9_H__
+#define __KXTJ9_H__
+
+#define KXTJ9_I2C_ADDR         0x0F
+
+struct kxtj9_platform_data {
+       unsigned int min_interval;      /* minimum poll interval (in milli-seconds) */
+
+       /*
+        * By default, x is axis 0, y is axis 1, z is axis 2; these can be
+        * changed to account for sensor orientation within the host device.
+        */
+       u8 axis_map_x;
+       u8 axis_map_y;
+       u8 axis_map_z;
+
+       /*
+        * Each axis can be negated to account for sensor orientation within
+        * the host device.
+        */
+       bool negate_x;
+       bool negate_y;
+       bool negate_z;
+
+       /* CTRL_REG1: set resolution, g-range, data ready enable */
+       /* Output resolution: 8-bit valid or 12-bit valid */
+       #define RES_8BIT                0
+       #define RES_12BIT               (1 << 6)
+       u8 res_12bit;
+       /* Output g-range: +/-2g, 4g, or 8g */
+       #define KXTJ9_G_2G              0
+       #define KXTJ9_G_4G              (1 << 3)
+       #define KXTJ9_G_8G              (1 << 4)
+       u8 g_range;
+
+       /* DATA_CTRL_REG: controls the output data rate of the part */
+       #define ODR12_5F                0
+       #define ODR25F                  1
+       #define ODR50F                  2
+       #define ODR100F                 3
+       #define ODR200F                 4
+       #define ODR400F                 5
+       #define ODR800F                 6
+       u8 data_odr_init;
+
+       int (*init)(void);
+       void (*exit)(void);
+       int (*power_on)(void);
+       int (*power_off)(void);
+};
+#endif  /* __KXTJ9_H__ */
index 9a43ad7..46ac9a5 100644 (file)
 
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define DIV_ROUND_UP_ULL(ll,d) \
+       ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; })
+
+#if BITS_PER_LONG == 32
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d)
+#else
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
+#endif
 
 /* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
 #define roundup(x, y) (                                        \
index 2ed0b6c..ddee79b 100644 (file)
@@ -1132,7 +1132,7 @@ struct net_device {
        spinlock_t              addr_list_lock;
        struct netdev_hw_addr_list      uc;     /* Unicast mac addresses */
        struct netdev_hw_addr_list      mc;     /* Multicast mac addresses */
-       int                     uc_promisc;
+       bool                    uc_promisc;
        unsigned int            promiscuity;
        unsigned int            allmulti;
 
@@ -1679,9 +1679,12 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
                                        unsigned int offset)
 {
+       if (!pskb_may_pull(skb, hlen))
+               return NULL;
+
        NAPI_GRO_CB(skb)->frag0 = NULL;
        NAPI_GRO_CB(skb)->frag0_len = 0;
-       return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+       return skb->data + offset;
 }
 
 static inline void *skb_gro_mac_header(struct sk_buff *skb)
index 504b289..a3c4bc8 100644 (file)
@@ -563,6 +563,9 @@ enum {
        NFSPROC4_CLNT_GETDEVICEINFO,
        NFSPROC4_CLNT_LAYOUTCOMMIT,
        NFSPROC4_CLNT_LAYOUTRETURN,
+       NFSPROC4_CLNT_SECINFO_NO_NAME,
+       NFSPROC4_CLNT_TEST_STATEID,
+       NFSPROC4_CLNT_FREE_STATEID,
 };
 
 /* nfs41 types */
index 08c444a..50a661f 100644 (file)
@@ -16,6 +16,7 @@ struct nfs4_sequence_args;
 struct nfs4_sequence_res;
 struct nfs_server;
 struct nfs4_minor_version_ops;
+struct server_scope;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -77,12 +78,13 @@ struct nfs_client {
        /* The flags used for obtaining the clientid during EXCHANGE_ID */
        u32                     cl_exchange_flags;
        struct nfs4_session     *cl_session;    /* sharred session */
-       struct list_head        cl_layouts;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
        struct fscache_cookie   *fscache;       /* client index cache cookie */
 #endif
+
+       struct server_scope     *server_scope;  /* from exchange_id */
 };
 
 /*
@@ -149,6 +151,7 @@ struct nfs_server {
        struct rb_root          openowner_id;
        struct rb_root          lockowner_id;
 #endif
+       struct list_head        layouts;
        struct list_head        delegations;
        void (*destroy)(struct nfs_server *);
 
index 25311b3..e2791a2 100644 (file)
@@ -55,20 +55,28 @@ struct nfs_page {
        struct nfs_writeverf    wb_verf;        /* Commit cookie */
 };
 
+struct nfs_pageio_descriptor;
+struct nfs_pageio_ops {
+       void    (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
+       bool    (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+       int     (*pg_doio)(struct nfs_pageio_descriptor *);
+};
+
 struct nfs_pageio_descriptor {
        struct list_head        pg_list;
        unsigned long           pg_bytes_written;
        size_t                  pg_count;
        size_t                  pg_bsize;
        unsigned int            pg_base;
-       char                    pg_moreio;
+       unsigned char           pg_moreio : 1,
+                               pg_recoalesce : 1;
 
        struct inode            *pg_inode;
-       int                     (*pg_doio)(struct nfs_pageio_descriptor *);
+       const struct nfs_pageio_ops *pg_ops;
        int                     pg_ioflags;
        int                     pg_error;
+       const struct rpc_call_ops *pg_rpc_callops;
        struct pnfs_layout_segment *pg_lseg;
-       bool                    (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 };
 
 #define NFS_WBACK_BUSY(req)    (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,7 +93,7 @@ extern        int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
                          pgoff_t idx_start, unsigned int npages, int tag);
 extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                             struct inode *inode,
-                            int (*doio)(struct nfs_pageio_descriptor *desc),
+                            const struct nfs_pageio_ops *pg_ops,
                             size_t bsize,
                             int how);
 extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -100,7 +108,6 @@ extern      void nfs_unlock_request(struct nfs_page *req);
 extern int nfs_set_page_tag_locked(struct nfs_page *req);
 extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
 
-
 /*
  * Lock the page of an asynchronous request without getting a new reference
  */
index 00848d8..5b11595 100644 (file)
@@ -269,9 +269,10 @@ struct nfs4_layoutcommit_data {
 };
 
 struct nfs4_layoutreturn_args {
-       __u32   layout_type;
+       struct pnfs_layout_hdr *layout;
        struct inode *inode;
        nfs4_stateid stateid;
+       __u32   layout_type;
        struct nfs4_sequence_args seq_args;
 };
 
@@ -1060,6 +1061,7 @@ struct server_scope {
 struct nfs41_exchange_id_res {
        struct nfs_client               *client;
        u32                             flags;
+       struct server_scope             *server_scope;
 };
 
 struct nfs41_create_session_args {
@@ -1083,6 +1085,34 @@ struct nfs41_reclaim_complete_args {
 struct nfs41_reclaim_complete_res {
        struct nfs4_sequence_res        seq_res;
 };
+
+#define SECINFO_STYLE_CURRENT_FH 0
+#define SECINFO_STYLE_PARENT 1
+struct nfs41_secinfo_no_name_args {
+       int                             style;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs41_test_stateid_args {
+       nfs4_stateid                    *stateid;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs41_test_stateid_res {
+       unsigned int                    status;
+       struct nfs4_sequence_res        seq_res;
+};
+
+struct nfs41_free_stateid_args {
+       nfs4_stateid                    *stateid;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs41_free_stateid_res {
+       unsigned int                    status;
+       struct nfs4_sequence_res        seq_res;
+};
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
@@ -1096,6 +1126,7 @@ struct nfs_read_data {
        struct rpc_cred         *cred;
        struct nfs_fattr        fattr;  /* fattr storage */
        struct list_head        pages;  /* Coalesced read requests */
+       struct list_head        list;   /* lists of struct nfs_read_data */
        struct nfs_page         *req;   /* multi ops per nfs_page */
        struct page             **pagevec;
        unsigned int            npages; /* Max length of pagevec */
@@ -1119,6 +1150,7 @@ struct nfs_write_data {
        struct nfs_fattr        fattr;
        struct nfs_writeverf    verf;
        struct list_head        pages;          /* Coalesced requests we wish to flush */
+       struct list_head        list;           /* lists of struct nfs_write_data */
        struct nfs_page         *req;           /* multi ops per nfs_page */
        struct page             **pagevec;
        unsigned int            npages;         /* Max length of pagevec */
index 76efbdd..435dd5f 100644 (file)
@@ -41,9 +41,6 @@
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <scsi/osd_protocol.h>
-
-#define PNFS_OSD_OSDNAME_MAXSIZE 256
 
 /*
  * draft-ietf-nfsv4-minorversion-22
@@ -99,12 +96,6 @@ struct pnfs_osd_objid {
 #define _DEVID_HI(oid_device_id) \
        (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
 
-static inline int
-pnfs_osd_objid_xdr_sz(void)
-{
-       return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
-}
-
 enum pnfs_osd_version {
        PNFS_OSD_MISSING              = 0,
        PNFS_OSD_VERSION_1            = 1,
@@ -189,8 +180,6 @@ struct pnfs_osd_targetid {
        struct nfs4_string              oti_scsi_device_id;
 };
 
-enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
-
 /*   struct netaddr4 {
  *       // see struct rpcb in RFC1833
  *       string r_netid<>;    // network id
@@ -207,12 +196,6 @@ struct pnfs_osd_targetaddr {
        struct pnfs_osd_net_addr        ota_netaddr;
 };
 
-enum {
-       NETWORK_ID_MAX = 16 / 4,
-       UNIVERSAL_ADDRESS_MAX = 64 / 4,
-       PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
-};
-
 struct pnfs_osd_deviceaddr {
        struct pnfs_osd_targetid        oda_targetid;
        struct pnfs_osd_targetaddr      oda_targetaddr;
@@ -222,15 +205,6 @@ struct pnfs_osd_deviceaddr {
        struct nfs4_string              oda_osdname;
 };
 
-enum {
-       ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
-       PNFS_OSD_DEVICEADDR_MAX =
-               PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
-               2 /*oda_lun*/ +
-               1 + OSD_SYSTEMID_LEN +
-               1 + ODA_OSDNAME_MAX,
-};
-
 /* LAYOUTCOMMIT: layoutupdate */
 
 /*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
@@ -279,7 +253,7 @@ struct pnfs_osd_ioerr {
        u32                     oer_errno;
 };
 
-/* OSD XDR API */
+/* OSD XDR Client API */
 /* Layout helpers */
 /* Layout decoding is done in two parts:
  * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
@@ -337,8 +311,7 @@ extern int
 pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
                                 struct pnfs_osd_layoutupdate *lou);
 
-/* osd_ioerror encoding/decoding (layout_return) */
-/* Client */
+/* osd_ioerror encoding (layout_return) */
 extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
 extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
 
index 650af6d..643b96c 100644 (file)
@@ -50,8 +50,6 @@ typedef       int (write_proc_t)(struct file *file, const char __user *buffer,
 
 struct proc_dir_entry {
        unsigned int low_ino;
-       unsigned int namelen;
-       const char *name;
        mode_t mode;
        nlink_t nlink;
        uid_t uid;
@@ -73,9 +71,11 @@ struct proc_dir_entry {
        write_proc_t *write_proc;
        atomic_t count;         /* use count */
        int pde_users;  /* number of callers into module in progress */
-       spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
        struct completion *pde_unload_completion;
        struct list_head pde_openers;   /* who did ->open, but not ->release */
+       spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+       u8 namelen;
+       char name[];
 };
 
 enum kcore_type {
index 75cbf4f..9e65d9e 100644 (file)
@@ -245,10 +245,16 @@ struct mdp_superblock_1 {
        __u8    device_uuid[16]; /* user-space setable, ignored by kernel */
        __u8    devflags;       /* per-device flags.  Only one defined...*/
 #define        WriteMostly1    1       /* mask for writemostly flag in above */
-       __u8    pad2[64-57];    /* set to 0 when writing */
+       /* Bad block log.  If there are any bad blocks the feature flag is set.
+        * If offset and size are non-zero, that space is reserved and available
+        */
+       __u8    bblog_shift;    /* shift from sectors to block size */
+       __le16  bblog_size;     /* number of sectors reserved for list */
+       __le32  bblog_offset;   /* sector offset from superblock to bblog,
+                                * signed - not unsigned */
 
        /* array state information - 64 bytes */
-       __le64  utime;          /* 40 bits second, 24 btes microseconds */
+       __le64  utime;          /* 40 bits second, 24 bits microseconds */
        __le64  events;         /* incremented when superblock updated */
        __le64  resync_offset;  /* data before this offset (from data_offset) known to be in sync */
        __le32  sb_csum;        /* checksum up to devs[max_dev] */
@@ -270,8 +276,8 @@ struct mdp_superblock_1 {
                                           * must be honoured
                                           */
 #define        MD_FEATURE_RESHAPE_ACTIVE       4
+#define        MD_FEATURE_BAD_BLOCKS           8 /* badblock list is not empty */
 
-#define        MD_FEATURE_ALL                  (1|2|4)
+#define        MD_FEATURE_ALL                  (1|2|4|8)
 
 #endif 
-
index 0828842..f7f3ce3 100644 (file)
@@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
@@ -47,7 +47,7 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
                return 1;
        return 0;
 }
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
                                         unsigned int min_reqs)
 {
@@ -62,6 +62,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 {
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
index fe2d8e6..e775689 100644 (file)
@@ -227,6 +227,10 @@ void               rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void           rpc_destroy_wait_queue(struct rpc_wait_queue *);
 void           rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
                                        rpc_action action);
+void           rpc_sleep_on_priority(struct rpc_wait_queue *,
+                                       struct rpc_task *,
+                                       rpc_action action,
+                                       int priority);
 void           rpc_wake_up_queued_task(struct rpc_wait_queue *,
                                        struct rpc_task *);
 void           rpc_wake_up(struct rpc_wait_queue *);
index 2f1e518..223588a 100644 (file)
@@ -92,7 +92,7 @@ struct svc_serv {
        struct module *         sv_module;      /* optional module to count when
                                                 * adding threads */
        svc_thread_fn           sv_function;    /* main function for threads */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct list_head        sv_cb_list;     /* queue for callback requests
                                                 * that arrive over the same
                                                 * connection */
@@ -100,7 +100,7 @@ struct svc_serv {
        wait_queue_head_t       sv_cb_waitq;    /* sleep here if there are no
                                                 * entries in the svc_cb_list */
        struct svc_xprt         *sv_bc_xprt;    /* callback on fore channel */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 };
 
 /*
index 81cce3b..15518a1 100644 (file)
@@ -22,6 +22,7 @@
 #define RPC_MIN_SLOT_TABLE     (2U)
 #define RPC_DEF_SLOT_TABLE     (16U)
 #define RPC_MAX_SLOT_TABLE     (128U)
+#define RPC_MAX_SLOT_TABLE_LIMIT       (65536U)
 
 /*
  * This describes a timeout strategy
@@ -100,18 +101,18 @@ struct rpc_rqst {
        ktime_t                 rq_xtime;       /* transmit time stamp */
        int                     rq_ntrans;
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct list_head        rq_bc_list;     /* Callback service list */
        unsigned long           rq_bc_pa_state; /* Backchannel prealloc state */
        struct list_head        rq_bc_pa_list;  /* Backchannel prealloc list */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANEL */
 };
 #define rq_svec                        rq_snd_buf.head
 #define rq_slen                        rq_snd_buf.len
 
 struct rpc_xprt_ops {
        void            (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
-       int             (*reserve_xprt)(struct rpc_task *task);
+       int             (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*rpcbind)(struct rpc_task *task);
        void            (*set_port)(struct rpc_xprt *xprt, unsigned short port);
@@ -164,12 +165,12 @@ struct rpc_xprt {
 
        struct rpc_wait_queue   binding;        /* requests waiting on rpcbind */
        struct rpc_wait_queue   sending;        /* requests waiting to send */
-       struct rpc_wait_queue   resend;         /* requests waiting to resend */
        struct rpc_wait_queue   pending;        /* requests in flight */
        struct rpc_wait_queue   backlog;        /* waiting for slot */
        struct list_head        free;           /* free slots */
-       struct rpc_rqst *       slot;           /* slot table storage */
-       unsigned int            max_reqs;       /* total slots */
+       unsigned int            max_reqs;       /* max number of slots */
+       unsigned int            min_reqs;       /* min number of slots */
+       atomic_t                num_reqs;       /* total slots */
        unsigned long           state;          /* transport state */
        unsigned char           shutdown   : 1, /* being shut down */
                                resvport   : 1; /* use a reserved port */
@@ -200,7 +201,7 @@ struct rpc_xprt {
        u32                     xid;            /* Next XID value to use */
        struct rpc_task *       snd_task;       /* Task blocked in send */
        struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct svc_serv         *bc_serv;       /* The RPC service which will */
                                                /* process the callback */
        unsigned int            bc_alloc_count; /* Total number of preallocs */
@@ -208,7 +209,7 @@ struct rpc_xprt {
                                                 * items */
        struct list_head        bc_pa_list;     /* List of preallocated
                                                 * backchannel rpc_rqst's */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
        struct list_head        recv;
 
        struct {
@@ -228,15 +229,15 @@ struct rpc_xprt {
        const char              *address_strings[RPC_DISPLAY_MAX];
 };
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Backchannel flags
  */
 #define        RPC_BC_PA_IN_USE        0x0001          /* Preallocated backchannel */
                                                /* buffer in use */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static inline int bc_prealloc(struct rpc_rqst *req)
 {
        return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
@@ -246,7 +247,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
 {
        return 0;
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 struct xprt_create {
        int                     ident;          /* XPRT_TRANSPORT identifier */
@@ -271,8 +272,8 @@ struct xprt_class {
 struct rpc_xprt                *xprt_create_transport(struct xprt_create *args);
 void                   xprt_connect(struct rpc_task *task);
 void                   xprt_reserve(struct rpc_task *task);
-int                    xprt_reserve_xprt(struct rpc_task *task);
-int                    xprt_reserve_xprt_cong(struct rpc_task *task);
+int                    xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+int                    xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 int                    xprt_prepare_transmit(struct rpc_task *task);
 void                   xprt_transmit(struct rpc_task *task);
 void                   xprt_end_transmit(struct rpc_task *task);
@@ -282,7 +283,9 @@ void                        xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void                   xprt_release(struct rpc_task *task);
 struct rpc_xprt *      xprt_get(struct rpc_xprt *xprt);
 void                   xprt_put(struct rpc_xprt *xprt);
-struct rpc_xprt *      xprt_alloc(struct net *net, int size, int max_req);
+struct rpc_xprt *      xprt_alloc(struct net *net, size_t size,
+                               unsigned int num_prealloc,
+                               unsigned int max_req);
 void                   xprt_free(struct rpc_xprt *);
 
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
@@ -321,7 +324,6 @@ void                        xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 #define XPRT_CLOSING           (6)
 #define XPRT_CONNECTION_ABORT  (7)
 #define XPRT_CONNECTION_CLOSE  (8)
-#define XPRT_INITIALIZED       (9)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
index 011bcfe..111843f 100644 (file)
@@ -59,6 +59,84 @@ struct watchdog_info {
 #define WATCHDOG_NOWAYOUT      0
 #endif
 
+struct watchdog_ops;
+struct watchdog_device;
+
+/** struct watchdog_ops - The watchdog-devices operations
+ *
+ * @owner:     The module owner.
+ * @start:     The routine for starting the watchdog device.
+ * @stop:      The routine for stopping the watchdog device.
+ * @ping:      The routine that sends a keepalive ping to the watchdog device.
+ * @status:    The routine that shows the status of the watchdog device.
+ * @set_timeout:The routine for setting the watchdog devices timeout value.
+ * @ioctl:     The routines that handles extra ioctl calls.
+ *
+ * The watchdog_ops structure contains a list of low-level operations
+ * that control a watchdog device. It also contains the module that owns
+ * these operations. The start and stop function are mandatory, all other
+ * functions are optonal.
+ */
+struct watchdog_ops {
+       struct module *owner;
+       /* mandatory operations */
+       int (*start)(struct watchdog_device *);
+       int (*stop)(struct watchdog_device *);
+       /* optional operations */
+       int (*ping)(struct watchdog_device *);
+       unsigned int (*status)(struct watchdog_device *);
+       int (*set_timeout)(struct watchdog_device *, unsigned int);
+       long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+/** struct watchdog_device - The structure that defines a watchdog device
+ *
+ * @info:      Pointer to a watchdog_info structure.
+ * @ops:       Pointer to the list of watchdog operations.
+ * @bootstatus:        Status of the watchdog device at boot.
+ * @timeout:   The watchdog devices timeout value.
+ * @min_timeout:The watchdog devices minimum timeout value.
+ * @max_timeout:The watchdog devices maximum timeout value.
+ * @driver-data:Pointer to the drivers private data.
+ * @status:    Field that contains the devices internal status bits.
+ *
+ * The watchdog_device structure contains all information about a
+ * watchdog timer device.
+ *
+ * The driver-data field may not be accessed directly. It must be accessed
+ * via the watchdog_set_drvdata and watchdog_get_drvdata helpers.
+ */
+struct watchdog_device {
+       const struct watchdog_info *info;
+       const struct watchdog_ops *ops;
+       unsigned int bootstatus;
+       unsigned int timeout;
+       unsigned int min_timeout;
+       unsigned int max_timeout;
+       void *driver_data;
+       unsigned long status;
+/* Bit numbers for status flags */
+#define WDOG_ACTIVE            0       /* Is the watchdog running/active */
+#define WDOG_DEV_OPEN          1       /* Opened via /dev/watchdog ? */
+#define WDOG_ALLOW_RELEASE     2       /* Did we receive the magic char ? */
+#define WDOG_NO_WAY_OUT                3       /* Is 'nowayout' feature set ? */
+};
+
+/* Use the following functions to manipulate watchdog driver specific data */
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+{
+       wdd->driver_data = data;
+}
+
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+{
+       return wdd->driver_data;
+}
+
+/* drivers/watchdog/core/watchdog_core.c */
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
 #endif /* __KERNEL__ */
 
 #endif  /* ifndef _LINUX_WATCHDOG_H */
index 38e8c4d..fd98bb9 100644 (file)
 #define WM97XX_ADCSEL_X                0x1000  /* x coord measurement */
 #define WM97XX_ADCSEL_Y                0x2000  /* y coord measurement */
 #define WM97XX_ADCSEL_PRES     0x3000  /* pressure measurement */
-#define WM97XX_ADCSEL_MASK     0x7000
+#define WM97XX_AUX_ID1         0x4000
+#define WM97XX_AUX_ID2         0x5000
+#define WM97XX_AUX_ID3         0x6000
+#define WM97XX_AUX_ID4         0x7000
+#define WM97XX_ADCSEL_MASK     0x7000  /* ADC selection mask */
 #define WM97XX_COO             0x0800  /* enable coordinate mode */
 #define WM97XX_CTC             0x0400  /* enable continuous mode */
 #define WM97XX_CM_RATE_93      0x0000  /* 93.75Hz continuous rate */
 #define WM97XX_PRP_DET_DIG     0xc000  /* setect on, digitise on */
 #define WM97XX_RPR             0x2000  /* wake up on pen down */
 #define WM97XX_PEN_DOWN                0x8000  /* pen is down */
-#define WM97XX_ADCSRC_MASK     0x7000  /* ADC source mask */
-
-#define WM97XX_AUX_ID1         0x8001
-#define WM97XX_AUX_ID2         0x8002
-#define WM97XX_AUX_ID3         0x8003
-#define WM97XX_AUX_ID4         0x8004
-
 
 /* WM9712 Bits */
 #define WM9712_45W             0x1000  /* set for 5-wire touchscreen */
index ea68b3c..988ba06 100644 (file)
 /* default iSCSI listen port for incoming connections */
 #define ISCSI_LISTEN_PORT      3260
 
+/* iSCSI header length */
+#define ISCSI_HDR_LEN          48
+
+/* iSCSI CRC32C length */
+#define ISCSI_CRC_LEN          4
+
 /* Padding word length */
 #define ISCSI_PAD_LEN          4
 
 /*
+ * Serial Number Arithmetic, 32 bits, RFC1982
+ */
+
+static inline int iscsi_sna_lt(u32 n1, u32 n2)
+{
+       return (s32)(n1 - n2) < 0;
+}
+
+static inline int iscsi_sna_lte(u32 n1, u32 n2)
+{
+       return (s32)(n1 - n2) <= 0;
+}
+
+static inline int iscsi_sna_gt(u32 n1, u32 n2)
+{
+       return (s32)(n1 - n2) > 0;
+}
+
+static inline int iscsi_sna_gte(u32 n1, u32 n2)
+{
+       return (s32)(n1 - n2) >= 0;
+}
+
+/*
  * useful common(control and data pathes) macro
  */
 #define ntoh24(p) (((p)[0] << 16) | ((p)[1] << 8) | ((p)[2]))
@@ -116,7 +146,7 @@ struct iscsi_ahs_hdr {
 #define ISCSI_CDB_SIZE                 16
 
 /* iSCSI PDU Header */
-struct iscsi_cmd {
+struct iscsi_scsi_req {
        uint8_t opcode;
        uint8_t flags;
        __be16 rsvd2;
@@ -161,7 +191,7 @@ struct iscsi_ecdb_ahdr {
 };
 
 /* SCSI Response Header */
-struct iscsi_cmd_rsp {
+struct iscsi_scsi_rsp {
        uint8_t opcode;
        uint8_t flags;
        uint8_t response;
@@ -406,7 +436,7 @@ struct iscsi_text_rsp {
 };
 
 /* Login Header */
-struct iscsi_login {
+struct iscsi_login_req {
        uint8_t opcode;
        uint8_t flags;
        uint8_t max_version;    /* Max. version supported */
@@ -427,7 +457,13 @@ struct iscsi_login {
 #define ISCSI_FLAG_LOGIN_TRANSIT               0x80
 #define ISCSI_FLAG_LOGIN_CONTINUE              0x40
 #define ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK    0x0C    /* 2 bits */
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE1                0x04
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE2                0x08
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE3                0x0C
 #define ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK       0x03    /* 2 bits */
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE1           0x01
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE2           0x02
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE3           0x03
 
 #define ISCSI_LOGIN_CURRENT_STAGE(flags) \
        ((flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2)
@@ -550,17 +586,25 @@ struct iscsi_logout_rsp {
 struct iscsi_snack {
        uint8_t opcode;
        uint8_t flags;
-       uint8_t rsvd2[14];
+       uint8_t rsvd2[2];
+       uint8_t hlength;
+       uint8_t dlength[3];
+       uint8_t lun[8];
        itt_t    itt;
+       __be32  ttt;
+       uint8_t rsvd3[4];
+       __be32  exp_statsn;
+       uint8_t rsvd4[8];
        __be32  begrun;
        __be32  runlength;
-       __be32  exp_statsn;
-       __be32  rsvd3;
-       __be32  exp_datasn;
-       uint8_t rsvd6[8];
 };
 
 /* SNACK PDU flags */
+#define ISCSI_FLAG_SNACK_TYPE_DATA             0
+#define ISCSI_FLAG_SNACK_TYPE_R2T              0
+#define ISCSI_FLAG_SNACK_TYPE_STATUS           1
+#define ISCSI_FLAG_SNACK_TYPE_DATA_ACK         2
+#define ISCSI_FLAG_SNACK_TYPE_RDATA            3
 #define ISCSI_FLAG_SNACK_TYPE_MASK     0x0F    /* 4 bits */
 
 /* Reject Message Header */
index e1bad11..57e71fa 100644 (file)
@@ -507,6 +507,18 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream);
 void snd_pcm_vma_notify_data(void *client, void *data);
 int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);
 
+
+#ifdef CONFIG_SND_DEBUG
+void snd_pcm_debug_name(struct snd_pcm_substream *substream,
+                          char *name, size_t len);
+#else
+static inline void
+snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
+{
+       *buf = 0;
+}
+#endif
+
 /*
  *  PCM library
  */
@@ -749,17 +761,18 @@ static inline const struct snd_interval *hw_param_interval_c(const struct snd_pc
        return &params->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL];
 }
 
-#define params_access(p) ((__force snd_pcm_access_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_ACCESS)))
-#define params_format(p) ((__force snd_pcm_format_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_FORMAT)))
-#define params_subformat(p) snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_SUBFORMAT))
-#define params_channels(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min
-#define params_rate(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_RATE)->min
-#define params_period_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min
-#define params_period_bytes(p) ((params_period_size(p)*snd_pcm_format_physical_width(params_format(p))*params_channels(p))/8)
-#define params_periods(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIODS)->min
-#define params_buffer_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min
-#define params_buffer_bytes(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min
-
+#define params_channels(p) \
+       (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min)
+#define params_rate(p) \
+       (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_RATE)->min)
+#define params_period_size(p) \
+       (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min)
+#define params_periods(p) \
+       (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIODS)->min)
+#define params_buffer_size(p) \
+       (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min)
+#define params_buffer_bytes(p) \
+       (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min)
 
 int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v);
 void snd_interval_mul(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c);
index 85cf1cf..f494f1e 100644 (file)
@@ -337,5 +337,19 @@ static inline unsigned int sub(unsigned int a, unsigned int b)
        return 0;
 }
 
-#endif /* __SOUND_PCM_PARAMS_H */
+#define params_access(p) ((__force snd_pcm_access_t)\
+               snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_ACCESS)))
+#define params_format(p) ((__force snd_pcm_format_t)\
+               snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_FORMAT)))
+#define params_subformat(p) \
+       snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_SUBFORMAT))
 
+static inline unsigned int
+params_period_bytes(const struct snd_pcm_hw_params *p)
+{
+       return (params_period_size(p) *
+               snd_pcm_format_physical_width(params_format(p)) *
+               params_channels(p)) / 8;
+}
+
+#endif /* __SOUND_PCM_PARAMS_H */
index e09505c..e0583b7 100644 (file)
        .get = snd_soc_dapm_get_enum_virt, \
        .put = snd_soc_dapm_put_enum_virt, \
        .private_value = (unsigned long)&xenum }
+#define SOC_DAPM_ENUM_EXT(xname, xenum, xget, xput) \
+{      .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+       .info = snd_soc_info_enum_double, \
+       .get = xget, \
+       .put = xput, \
+       .private_value = (unsigned long)&xenum }
 #define SOC_DAPM_VALUE_ENUM(xname, xenum) \
 {      .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
        .info = snd_soc_info_enum_double, \
index 9844580..1d2b6ce 100644 (file)
  */
 
 #include <linux/cgroup.h>
+#include <linux/cred.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/init_task.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mm.h>
@@ -1514,6 +1516,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                struct cgroup *root_cgrp = &root->top_cgroup;
                struct inode *inode;
                struct cgroupfs_root *existing_root;
+               const struct cred *cred;
                int i;
 
                BUG_ON(sb->s_root != NULL);
@@ -1593,7 +1596,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                BUG_ON(!list_empty(&root_cgrp->children));
                BUG_ON(root->number_of_cgroups != 1);
 
+               cred = override_creds(&init_cred);
                cgroup_populate_dir(root_cgrp);
+               revert_creds(cred);
                mutex_unlock(&cgroup_mutex);
                mutex_unlock(&inode->i_mutex);
        } else {
index 18197ae..616c781 100644 (file)
@@ -992,11 +992,8 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
        sigset_from_compat(&newset, &newset32);
        sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-       spin_lock_irq(&current->sighand->siglock);
        current->saved_sigmask = current->blocked;
-       current->blocked = newset;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       set_current_blocked(&newset);
 
        current->state = TASK_INTERRUPTIBLE;
        schedule();
index d7f70ae..291c970 100644 (file)
@@ -3102,15 +3102,11 @@ SYSCALL_DEFINE0(sgetmask)
 
 SYSCALL_DEFINE1(ssetmask, int, newmask)
 {
-       int old;
-
-       spin_lock_irq(&current->sighand->siglock);
-       old = current->blocked.sig[0];
+       int old = current->blocked.sig[0];
+       sigset_t newset;
 
-       siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
-                                                 sigmask(SIGSTOP)));
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP)));
+       set_current_blocked(&newset);
 
        return old;
 }
@@ -3167,11 +3163,8 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
                return -EFAULT;
        sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-       spin_lock_irq(&current->sighand->siglock);
        current->saved_sigmask = current->blocked;
-       current->blocked = newset;
-       recalc_sigpending();
-       spin_unlock_irq(&current->sighand->siglock);
+       set_current_blocked(&newset);
 
        current->state = TASK_INTERRUPTIBLE;
        schedule();
index 934e221..9d40a07 100644 (file)
@@ -695,7 +695,7 @@ void vlan_setup(struct net_device *dev)
        ether_setup(dev);
 
        dev->priv_flags         |= IFF_802_1Q_VLAN;
-       dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
+       dev->priv_flags         &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
        dev->tx_queue_len       = 0;
 
        dev->netdev_ops         = &vlan_netdev_ops;
index 8c100c9..d4f5dff 100644 (file)
@@ -231,6 +231,7 @@ void bnep_net_setup(struct net_device *dev)
        dev->addr_len = ETH_ALEN;
 
        ether_setup(dev);
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->netdev_ops = &bnep_netdev_ops;
 
        dev->watchdog_timeo  = HZ * 2;
index 9444c5c..17d67b5 100644 (file)
@@ -4497,10 +4497,10 @@ void __dev_set_rx_mode(struct net_device *dev)
                 */
                if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
                        __dev_set_promiscuity(dev, 1);
-                       dev->uc_promisc = 1;
+                       dev->uc_promisc = true;
                } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
                        __dev_set_promiscuity(dev, -1);
-                       dev->uc_promisc = 0;
+                       dev->uc_promisc = false;
                }
 
                if (ops->ndo_set_multicast_list)
index f76079c..e35a6fb 100644 (file)
@@ -1070,7 +1070,9 @@ static ssize_t pktgen_if_write(struct file *file,
                len = num_arg(&user_buffer[i], 10, &value);
                if (len < 0)
                        return len;
-
+               if ((value > 0) &&
+                   (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+                       return -ENOTSUPP;
                i += len;
                pkt_dev->clone_skb = value;
 
@@ -3555,7 +3557,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
        pkt_dev->min_pkt_size = ETH_ZLEN;
        pkt_dev->max_pkt_size = ETH_ZLEN;
        pkt_dev->nfrags = 0;
-       pkt_dev->clone_skb = pg_clone_skb_d;
        pkt_dev->delay = pg_delay_d;
        pkt_dev->count = pg_count_d;
        pkt_dev->sofar = 0;
@@ -3563,7 +3564,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
        pkt_dev->udp_src_max = 9;
        pkt_dev->udp_dst_min = 9;
        pkt_dev->udp_dst_max = 9;
-
        pkt_dev->vlan_p = 0;
        pkt_dev->vlan_cfi = 0;
        pkt_dev->vlan_id = 0xffff;
@@ -3575,6 +3575,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
        err = pktgen_setup_dev(pkt_dev, ifname);
        if (err)
                goto out1;
+       if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
+               pkt_dev->clone_skb = pg_clone_skb_d;
 
        pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
                                          &pktgen_if_fops, pkt_dev);
index 5cffb63..27997d3 100644 (file)
@@ -231,6 +231,7 @@ EXPORT_SYMBOL(eth_header_parse);
  * eth_header_cache - fill cache entry from neighbour
  * @neigh: source neighbour
  * @hh: destination cache entry
+ * @type: Ethernet type field
  * Create an Ethernet header template from the neighbour.
  */
 int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
@@ -339,6 +340,7 @@ void ether_setup(struct net_device *dev)
        dev->addr_len           = ETH_ALEN;
        dev->tx_queue_len       = 1000; /* Ethernet wants good queues */
        dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
+       dev->priv_flags         = IFF_TX_SKB_SHARING;
 
        memset(dev->broadcast, 0xFF, ETH_ALEN);
 
index 37b3c18..bc19bd0 100644 (file)
@@ -1134,15 +1134,15 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
                                        struct in_device *in_dev)
 
 {
-       struct in_ifaddr *ifa = in_dev->ifa_list;
-
-       if (!ifa)
-               return;
+       struct in_ifaddr *ifa;
 
-       arp_send(ARPOP_REQUEST, ETH_P_ARP,
-                ifa->ifa_local, dev,
-                ifa->ifa_local, NULL,
-                dev->dev_addr, NULL);
+       for (ifa = in_dev->ifa_list; ifa;
+            ifa = ifa->ifa_next) {
+               arp_send(ARPOP_REQUEST, ETH_P_ARP,
+                        ifa->ifa_local, dev,
+                        ifa->ifa_local, NULL,
+                        dev->dev_addr, NULL);
+       }
 }
 
 /* Called only under RTNL semaphore */
index a06c53c..a55500c 100644 (file)
@@ -1481,6 +1481,8 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
        struct in6_addr addr;
+       if (ifp->prefix_len == 127) /* RFC 6164 */
+               return;
        ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
        if (ipv6_addr_any(&addr))
                return;
index a8193f5..d2726a7 100644 (file)
@@ -103,7 +103,7 @@ static struct net_device_ops l2tp_eth_netdev_ops = {
 static void l2tp_eth_dev_setup(struct net_device *dev)
 {
        ether_setup(dev);
-
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->netdev_ops         = &l2tp_eth_netdev_ops;
        dev->destructor         = free_netdev;
 }
index cd5fb40..556e7e6 100644 (file)
@@ -698,6 +698,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
 static void ieee80211_if_setup(struct net_device *dev)
 {
        ether_setup(dev);
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->netdev_ops = &ieee80211_dataif_ops;
        dev->destructor = free_netdev;
 }
index 02dc82d..b1cbbcd 100644 (file)
@@ -467,7 +467,7 @@ static struct socket *sock_alloc(void)
        struct inode *inode;
        struct socket *sock;
 
-       inode = new_inode(sock_mnt->mnt_sb);
+       inode = new_inode_pseudo(sock_mnt->mnt_sb);
        if (!inode)
                return NULL;
 
@@ -580,7 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
-int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
+static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
 {
        struct kiocb iocb;
        struct sock_iocb siocb;
index b2198e6..ffd243d 100644 (file)
@@ -4,6 +4,10 @@ config SUNRPC
 config SUNRPC_GSS
        tristate
 
+config SUNRPC_BACKCHANNEL
+       bool
+       depends on SUNRPC
+
 config SUNRPC_XPRT_RDMA
        tristate
        depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
index 9d2fca5..8209a04 100644 (file)
@@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
            addr.o rpcb_clnt.o timer.o xdr.o \
            sunrpc_syms.o cache.o rpc_pipe.o \
            svc_xprt.o
-sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
index cf06af3..91eaa26 100644 (file)
@@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define RPCDBG_FACILITY        RPCDBG_TRANS
 #endif
 
-#if defined(CONFIG_NFS_V4_1)
-
 /*
  * Helper routines that track the number of preallocation elements
  * on the transport.
@@ -174,7 +172,7 @@ out_free:
        dprintk("RPC:       setup backchannel transport failed\n");
        return -1;
 }
-EXPORT_SYMBOL(xprt_setup_backchannel);
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
 
 /*
  * Destroys the backchannel preallocated structures.
@@ -204,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
        dprintk("RPC:        backchannel list empty= %s\n",
                list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
-EXPORT_SYMBOL(xprt_destroy_backchannel);
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
 
 /*
  * One or more rpc_rqst structure have been preallocated during the
@@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req)
        spin_unlock_bh(&xprt->bc_pa_lock);
 }
 
-#endif /* CONFIG_NFS_V4_1 */
index 1dd1a68..0b2eb38 100644 (file)
@@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * reply over an existing open connection previously established by the client.
  */
 
-#if defined(CONFIG_NFS_V4_1)
-
 #include <linux/module.h>
 
 #include <linux/sunrpc/xprt.h>
@@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req)
        return ret;
 }
 
-#endif /* CONFIG_NFS_V4_1 */
index c50818f..c5347d2 100644 (file)
@@ -64,9 +64,9 @@ static void   call_decode(struct rpc_task *task);
 static void    call_bind(struct rpc_task *task);
 static void    call_bind_status(struct rpc_task *task);
 static void    call_transmit(struct rpc_task *task);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static void    call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 static void    call_status(struct rpc_task *task);
 static void    call_transmit_status(struct rpc_task *task);
 static void    call_refresh(struct rpc_task *task);
@@ -715,7 +715,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
 }
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
@@ -758,7 +758,7 @@ out:
        dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
        return task;
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 void
 rpc_call_start(struct rpc_task *task)
@@ -1361,7 +1361,7 @@ call_transmit_status(struct rpc_task *task)
        }
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * 5b. Send the backchannel RPC reply.  On error, drop the reply.  In
  * addition, disconnect on connectivity errors.
@@ -1425,7 +1425,7 @@ call_bc_transmit(struct rpc_task *task)
        }
        rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * 6.  Sort out the RPC call status
@@ -1550,8 +1550,7 @@ call_decode(struct rpc_task *task)
        kxdrdproc_t     decode = task->tk_msg.rpc_proc->p_decode;
        __be32          *p;
 
-       dprintk("RPC: %5u call_decode (status %d)\n",
-                       task->tk_pid, task->tk_status);
+       dprint_status(task);
 
        if (task->tk_flags & RPC_CALL_MAJORSEEN) {
                if (clnt->cl_chatty)
index 4814e24..d12ffa5 100644 (file)
@@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 /*
  * Add new request to a priority queue.
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+               struct rpc_task *task,
+               unsigned char queue_priority)
 {
        struct list_head *q;
        struct rpc_task *t;
 
        INIT_LIST_HEAD(&task->u.tk_wait.links);
-       q = &queue->tasks[task->tk_priority];
-       if (unlikely(task->tk_priority > queue->maxpriority))
+       q = &queue->tasks[queue_priority];
+       if (unlikely(queue_priority > queue->maxpriority))
                q = &queue->tasks[queue->maxpriority];
        list_for_each_entry(t, q, u.tk_wait.list) {
                if (t->tk_owner == task->tk_owner) {
@@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
  * improve overall performance.
  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
  */
-static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+               struct rpc_task *task,
+               unsigned char queue_priority)
 {
        BUG_ON (RPC_IS_QUEUED(task));
 
        if (RPC_IS_PRIORITY(queue))
-               __rpc_add_wait_queue_priority(queue, task);
+               __rpc_add_wait_queue_priority(queue, task, queue_priority);
        else if (RPC_IS_SWAPPER(task))
                list_add(&task->u.tk_wait.list, &queue->tasks[0]);
        else
@@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task)
  * NB: An RPC task will only receive interrupt-driven events as long
  * as it's on a wait queue.
  */
-static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
-                       rpc_action action)
+static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
+               struct rpc_task *task,
+               rpc_action action,
+               unsigned char queue_priority)
 {
        dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
                        task->tk_pid, rpc_qname(q), jiffies);
 
-       __rpc_add_wait_queue(q, task);
+       __rpc_add_wait_queue(q, task, queue_priority);
 
        BUG_ON(task->tk_callback != NULL);
        task->tk_callback = action;
@@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
         * Protect the queue operations.
         */
        spin_lock_bh(&q->lock);
-       __rpc_sleep_on(q, task, action);
+       __rpc_sleep_on_priority(q, task, action, task->tk_priority);
        spin_unlock_bh(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on);
 
+void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
+               rpc_action action, int priority)
+{
+       /* We shouldn't ever put an inactive task to sleep */
+       BUG_ON(!RPC_IS_ACTIVATED(task));
+
+       /*
+        * Protect the queue operations.
+        */
+       spin_lock_bh(&q->lock);
+       __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+       spin_unlock_bh(&q->lock);
+}
+
 /**
  * __rpc_do_wake_up_task - wake up a single rpc_task
  * @queue: wait queue
index 2b90292..6a69a11 100644 (file)
@@ -1252,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp)
        }
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Process a backchannel RPC request that arrived over an existing
  * outbound connection
@@ -1300,8 +1300,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
                return 0;
        }
 }
-EXPORT_SYMBOL(bc_svc_process);
-#endif /* CONFIG_NFS_V4_1 */
+EXPORT_SYMBOL_GPL(bc_svc_process);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * Return (transport-specific) limit on the rpc payload.
index f2cb5b8..767d494 100644 (file)
@@ -68,12 +68,12 @@ static void         svc_sock_free(struct svc_xprt *);
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
                                          struct net *, struct sockaddr *,
                                          int, int);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
                                             struct net *, struct sockaddr *,
                                             int, int);
 static void svc_bc_sock_free(struct svc_xprt *xprt);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
@@ -1243,7 +1243,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
        return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
                                             struct net *, struct sockaddr *,
                                             int, int);
@@ -1284,7 +1284,7 @@ static void svc_cleanup_bc_xprt_sock(void)
 {
        svc_unreg_xprt_class(&svc_tcp_bc_class);
 }
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
 static void svc_init_bc_xprt_sock(void)
 {
 }
@@ -1292,7 +1292,7 @@ static void svc_init_bc_xprt_sock(void)
 static void svc_cleanup_bc_xprt_sock(void)
 {
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 static struct svc_xprt_ops svc_tcp_ops = {
        .xpo_create = svc_tcp_create,
@@ -1623,7 +1623,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
        kfree(svsk);
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Create a back channel svc_xprt which shares the fore channel socket.
  */
@@ -1662,4 +1662,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt)
        if (xprt)
                kfree(container_of(xprt, struct svc_sock, sk_xprt));
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
index f008c14..277ebd4 100644 (file)
@@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
        kaddr[buf->page_base + len] = '\0';
        kunmap_atomic(kaddr, KM_USER0);
 }
-EXPORT_SYMBOL(xdr_terminate_string);
+EXPORT_SYMBOL_GPL(xdr_terminate_string);
 
 void
 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
index ce5eb68..9b6a4d1 100644 (file)
@@ -62,6 +62,7 @@
 /*
  * Local functions
  */
+static void     xprt_init(struct rpc_xprt *xprt, struct net *net);
 static void    xprt_request_init(struct rpc_task *, struct rpc_xprt *);
 static void    xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
@@ -191,10 +192,10 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
  * transport connects from colliding with writes.  No congestion control
  * is provided.
  */
-int xprt_reserve_xprt(struct rpc_task *task)
+int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
-       struct rpc_xprt *xprt = req->rq_xprt;
+       int priority;
 
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
                if (task == xprt->snd_task)
@@ -202,8 +203,10 @@ int xprt_reserve_xprt(struct rpc_task *task)
                goto out_sleep;
        }
        xprt->snd_task = task;
-       req->rq_bytes_sent = 0;
-       req->rq_ntrans++;
+       if (req != NULL) {
+               req->rq_bytes_sent = 0;
+               req->rq_ntrans++;
+       }
 
        return 1;
 
@@ -212,10 +215,13 @@ out_sleep:
                        task->tk_pid, xprt);
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
-       if (req->rq_ntrans)
-               rpc_sleep_on(&xprt->resend, task, NULL);
+       if (req == NULL)
+               priority = RPC_PRIORITY_LOW;
+       else if (!req->rq_ntrans)
+               priority = RPC_PRIORITY_NORMAL;
        else
-               rpc_sleep_on(&xprt->sending, task, NULL);
+               priority = RPC_PRIORITY_HIGH;
+       rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -239,22 +245,24 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
  * integrated into the decision of whether a request is allowed to be
  * woken up and given access to the transport.
  */
-int xprt_reserve_xprt_cong(struct rpc_task *task)
+int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-       struct rpc_xprt *xprt = task->tk_xprt;
        struct rpc_rqst *req = task->tk_rqstp;
+       int priority;
 
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
                if (task == xprt->snd_task)
                        return 1;
                goto out_sleep;
        }
+       if (req == NULL) {
+               xprt->snd_task = task;
+               return 1;
+       }
        if (__xprt_get_cong(xprt, task)) {
                xprt->snd_task = task;
-               if (req) {
-                       req->rq_bytes_sent = 0;
-                       req->rq_ntrans++;
-               }
+               req->rq_bytes_sent = 0;
+               req->rq_ntrans++;
                return 1;
        }
        xprt_clear_locked(xprt);
@@ -262,10 +270,13 @@ out_sleep:
        dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
-       if (req && req->rq_ntrans)
-               rpc_sleep_on(&xprt->resend, task, NULL);
+       if (req == NULL)
+               priority = RPC_PRIORITY_LOW;
+       else if (!req->rq_ntrans)
+               priority = RPC_PRIORITY_NORMAL;
        else
-               rpc_sleep_on(&xprt->sending, task, NULL);
+               priority = RPC_PRIORITY_HIGH;
+       rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -275,7 +286,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
        int retval;
 
        spin_lock_bh(&xprt->transport_lock);
-       retval = xprt->ops->reserve_xprt(task);
+       retval = xprt->ops->reserve_xprt(xprt, task);
        spin_unlock_bh(&xprt->transport_lock);
        return retval;
 }
@@ -288,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                return;
 
-       task = rpc_wake_up_next(&xprt->resend);
-       if (!task) {
-               task = rpc_wake_up_next(&xprt->sending);
-               if (!task)
-                       goto out_unlock;
-       }
+       task = rpc_wake_up_next(&xprt->sending);
+       if (task == NULL)
+               goto out_unlock;
 
        req = task->tk_rqstp;
        xprt->snd_task = task;
@@ -310,24 +318,25 @@ out_unlock:
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
        struct rpc_task *task;
+       struct rpc_rqst *req;
 
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                return;
        if (RPCXPRT_CONGESTED(xprt))
                goto out_unlock;
-       task = rpc_wake_up_next(&xprt->resend);
-       if (!task) {
-               task = rpc_wake_up_next(&xprt->sending);
-               if (!task)
-                       goto out_unlock;
+       task = rpc_wake_up_next(&xprt->sending);
+       if (task == NULL)
+               goto out_unlock;
+
+       req = task->tk_rqstp;
+       if (req == NULL) {
+               xprt->snd_task = task;
+               return;
        }
        if (__xprt_get_cong(xprt, task)) {
-               struct rpc_rqst *req = task->tk_rqstp;
                xprt->snd_task = task;
-               if (req) {
-                       req->rq_bytes_sent = 0;
-                       req->rq_ntrans++;
-               }
+               req->rq_bytes_sent = 0;
+               req->rq_ntrans++;
                return;
        }
 out_unlock:
@@ -852,7 +861,7 @@ int xprt_prepare_transmit(struct rpc_task *task)
                err = req->rq_reply_bytes_recvd;
                goto out_unlock;
        }
-       if (!xprt->ops->reserve_xprt(task))
+       if (!xprt->ops->reserve_xprt(xprt, task))
                err = -EAGAIN;
 out_unlock:
        spin_unlock_bh(&xprt->transport_lock);
@@ -928,28 +937,66 @@ void xprt_transmit(struct rpc_task *task)
        spin_unlock_bh(&xprt->transport_lock);
 }
 
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+       struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+
+       if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+               goto out;
+       req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+       if (req != NULL)
+               goto out;
+       atomic_dec(&xprt->num_reqs);
+       req = ERR_PTR(-ENOMEM);
+out:
+       return req;
+}
+
+static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+       if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+               kfree(req);
+               return true;
+       }
+       return false;
+}
+
 static void xprt_alloc_slot(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
+       struct rpc_rqst *req;
 
-       task->tk_status = 0;
-       if (task->tk_rqstp)
-               return;
        if (!list_empty(&xprt->free)) {
-               struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
-               list_del_init(&req->rq_list);
-               task->tk_rqstp = req;
-               xprt_request_init(task, xprt);
-               return;
+               req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
+               list_del(&req->rq_list);
+               goto out_init_req;
+       }
+       req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
+       if (!IS_ERR(req))
+               goto out_init_req;
+       switch (PTR_ERR(req)) {
+       case -ENOMEM:
+               rpc_delay(task, HZ >> 2);
+               dprintk("RPC:       dynamic allocation of request slot "
+                               "failed! Retrying\n");
+               break;
+       case -EAGAIN:
+               rpc_sleep_on(&xprt->backlog, task, NULL);
+               dprintk("RPC:       waiting for request slot\n");
        }
-       dprintk("RPC:       waiting for request slot\n");
        task->tk_status = -EAGAIN;
-       task->tk_timeout = 0;
-       rpc_sleep_on(&xprt->backlog, task, NULL);
+       return;
+out_init_req:
+       task->tk_status = 0;
+       task->tk_rqstp = req;
+       xprt_request_init(task, xprt);
 }
 
 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
+       if (xprt_dynamic_free_slot(xprt, req))
+               return;
+
        memset(req, 0, sizeof(*req));   /* mark unused */
 
        spin_lock(&xprt->reserve_lock);
@@ -958,25 +1005,49 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
        spin_unlock(&xprt->reserve_lock);
 }
 
-struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
+static void xprt_free_all_slots(struct rpc_xprt *xprt)
+{
+       struct rpc_rqst *req;
+       while (!list_empty(&xprt->free)) {
+               req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
+               list_del(&req->rq_list);
+               kfree(req);
+       }
+}
+
+struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
+               unsigned int num_prealloc,
+               unsigned int max_alloc)
 {
        struct rpc_xprt *xprt;
+       struct rpc_rqst *req;
+       int i;
 
        xprt = kzalloc(size, GFP_KERNEL);
        if (xprt == NULL)
                goto out;
-       atomic_set(&xprt->count, 1);
 
-       xprt->max_reqs = max_req;
-       xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
-       if (xprt->slot == NULL)
+       xprt_init(xprt, net);
+
+       for (i = 0; i < num_prealloc; i++) {
+               req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+               if (!req)
+                       break;
+               list_add(&req->rq_list, &xprt->free);
+       }
+       if (i < num_prealloc)
                goto out_free;
+       if (max_alloc > num_prealloc)
+               xprt->max_reqs = max_alloc;
+       else
+               xprt->max_reqs = num_prealloc;
+       xprt->min_reqs = num_prealloc;
+       atomic_set(&xprt->num_reqs, num_prealloc);
 
-       xprt->xprt_net = get_net(net);
        return xprt;
 
 out_free:
-       kfree(xprt);
+       xprt_free(xprt);
 out:
        return NULL;
 }
@@ -985,7 +1056,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
 void xprt_free(struct rpc_xprt *xprt)
 {
        put_net(xprt->xprt_net);
-       kfree(xprt->slot);
+       xprt_free_all_slots(xprt);
        kfree(xprt);
 }
 EXPORT_SYMBOL_GPL(xprt_free);
@@ -1001,10 +1072,24 @@ void xprt_reserve(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
 
-       task->tk_status = -EIO;
+       task->tk_status = 0;
+       if (task->tk_rqstp != NULL)
+               return;
+
+       /* Note: grabbing the xprt_lock_write() here is not strictly needed,
+        * but ensures that we throttle new slot allocation if the transport
+        * is congested (e.g. if reconnecting or if we're out of socket
+        * write buffer space).
+        */
+       task->tk_timeout = 0;
+       task->tk_status = -EAGAIN;
+       if (!xprt_lock_write(xprt, task))
+               return;
+
        spin_lock(&xprt->reserve_lock);
        xprt_alloc_slot(task);
        spin_unlock(&xprt->reserve_lock);
+       xprt_release_write(xprt, task);
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1021,6 +1106,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 {
        struct rpc_rqst *req = task->tk_rqstp;
 
+       INIT_LIST_HEAD(&req->rq_list);
        req->rq_timeout = task->tk_client->cl_timeout->to_initval;
        req->rq_task    = task;
        req->rq_xprt    = xprt;
@@ -1073,6 +1159,34 @@ void xprt_release(struct rpc_task *task)
                xprt_free_bc_request(req);
 }
 
+static void xprt_init(struct rpc_xprt *xprt, struct net *net)
+{
+       atomic_set(&xprt->count, 1);
+
+       spin_lock_init(&xprt->transport_lock);
+       spin_lock_init(&xprt->reserve_lock);
+
+       INIT_LIST_HEAD(&xprt->free);
+       INIT_LIST_HEAD(&xprt->recv);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+       spin_lock_init(&xprt->bc_pa_lock);
+       INIT_LIST_HEAD(&xprt->bc_pa_list);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+       xprt->last_used = jiffies;
+       xprt->cwnd = RPC_INITCWND;
+       xprt->bind_index = 0;
+
+       rpc_init_wait_queue(&xprt->binding, "xprt_binding");
+       rpc_init_wait_queue(&xprt->pending, "xprt_pending");
+       rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+       rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
+
+       xprt_init_xid(xprt);
+
+       xprt->xprt_net = get_net(net);
+}
+
 /**
  * xprt_create_transport - create an RPC transport
  * @args: rpc transport creation arguments
@@ -1081,7 +1195,6 @@ void xprt_release(struct rpc_task *task)
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
        struct rpc_xprt *xprt;
-       struct rpc_rqst *req;
        struct xprt_class *t;
 
        spin_lock(&xprt_list_lock);
@@ -1100,46 +1213,17 @@ found:
        if (IS_ERR(xprt)) {
                dprintk("RPC:       xprt_create_transport: failed, %ld\n",
                                -PTR_ERR(xprt));
-               return xprt;
+               goto out;
        }
-       if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
-               /* ->setup returned a pre-initialized xprt: */
-               return xprt;
-
-       spin_lock_init(&xprt->transport_lock);
-       spin_lock_init(&xprt->reserve_lock);
-
-       INIT_LIST_HEAD(&xprt->free);
-       INIT_LIST_HEAD(&xprt->recv);
-#if defined(CONFIG_NFS_V4_1)
-       spin_lock_init(&xprt->bc_pa_lock);
-       INIT_LIST_HEAD(&xprt->bc_pa_list);
-#endif /* CONFIG_NFS_V4_1 */
-
        INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
        if (xprt_has_timer(xprt))
                setup_timer(&xprt->timer, xprt_init_autodisconnect,
                            (unsigned long)xprt);
        else
                init_timer(&xprt->timer);
-       xprt->last_used = jiffies;
-       xprt->cwnd = RPC_INITCWND;
-       xprt->bind_index = 0;
-
-       rpc_init_wait_queue(&xprt->binding, "xprt_binding");
-       rpc_init_wait_queue(&xprt->pending, "xprt_pending");
-       rpc_init_wait_queue(&xprt->sending, "xprt_sending");
-       rpc_init_wait_queue(&xprt->resend, "xprt_resend");
-       rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
-
-       /* initialize free list */
-       for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
-               list_add(&req->rq_list, &xprt->free);
-
-       xprt_init_xid(xprt);
-
        dprintk("RPC:       created transport %p with %u slots\n", xprt,
                        xprt->max_reqs);
+out:
        return xprt;
 }
 
@@ -1157,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt)
        rpc_destroy_wait_queue(&xprt->binding);
        rpc_destroy_wait_queue(&xprt->pending);
        rpc_destroy_wait_queue(&xprt->sending);
-       rpc_destroy_wait_queue(&xprt->resend);
        rpc_destroy_wait_queue(&xprt->backlog);
        cancel_work_sync(&xprt->task_cleanup);
        /*
index 0867070..b446e10 100644 (file)
@@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args)
        }
 
        xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+                       xprt_rdma_slot_table_entries,
                        xprt_rdma_slot_table_entries);
        if (xprt == NULL) {
                dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
@@ -452,9 +453,8 @@ xprt_rdma_connect(struct rpc_task *task)
 }
 
 static int
-xprt_rdma_reserve_xprt(struct rpc_task *task)
+xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-       struct rpc_xprt *xprt = task->tk_xprt;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
 
@@ -466,7 +466,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task)
                BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
        }
        xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
-       return xprt_reserve_xprt_cong(task);
+       return xprt_reserve_xprt_cong(xprt, task);
 }
 
 /*
index ddf0528..08c5d5a 100644 (file)
@@ -109,7 +109,7 @@ struct rpcrdma_ep {
  */
 
 /* temporary static scatter/gather max */
-#define RPCRDMA_MAX_DATA_SEGS  (8)     /* max scatter/gather */
+#define RPCRDMA_MAX_DATA_SEGS  (64)    /* max scatter/gather */
 #define RPCRDMA_MAX_SEGS       (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
 #define MAX_RPCRDMAHDR (\
        /* max supported RPC/RDMA header */ \
index 72abb73..d7f97ef 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
 #include <linux/sunrpc/bc_xprt.h>
 #endif
 
@@ -54,7 +54,8 @@ static void xs_close(struct rpc_xprt *xprt);
  * xprtsock tunables
  */
 unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
-unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
+unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
 
 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
@@ -75,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
 
 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
 
@@ -104,6 +106,15 @@ static ctl_table xs_tunables_table[] = {
                .extra2         = &max_slot_table_size
        },
        {
+               .procname       = "tcp_max_slot_table_entries",
+               .data           = &xprt_max_tcp_slot_table_entries,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &min_slot_table_size,
+               .extra2         = &max_tcp_slot_table_limit
+       },
+       {
                .procname       = "min_resvport",
                .data           = &xprt_min_resvport,
                .maxlen         = sizeof(unsigned int),
@@ -755,6 +766,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
        if (task == NULL)
                goto out_release;
        req = task->tk_rqstp;
+       if (req == NULL)
+               goto out_release;
        if (req->rq_bytes_sent == 0)
                goto out_release;
        if (req->rq_bytes_sent == req->rq_snd_buf.len)
@@ -1236,7 +1249,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
        return 0;
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Obtains an rpc_rqst previously allocated and invokes the common
  * tcp read code to read the data.  The result is placed in the callback
@@ -1299,7 +1312,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 {
        return xs_tcp_read_reply(xprt, desc);
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * Read data off the transport.  This can be either an RPC_CALL or an
@@ -2489,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
 }
 
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
-                                     unsigned int slot_table_size)
+                                     unsigned int slot_table_size,
+                                     unsigned int max_slot_table_size)
 {
        struct rpc_xprt *xprt;
        struct sock_xprt *new;
@@ -2499,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
                return ERR_PTR(-EBADF);
        }
 
-       xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
+       xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
+                       max_slot_table_size);
        if (xprt == NULL) {
                dprintk("RPC:       xs_setup_xprt: couldn't allocate "
                                "rpc_xprt\n");
@@ -2541,7 +2556,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
        struct rpc_xprt *xprt;
        struct rpc_xprt *ret;
 
-       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+                       xprt_max_tcp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2605,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
        struct sock_xprt *transport;
        struct rpc_xprt *ret;
 
-       xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
+                       xprt_udp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2681,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        struct sock_xprt *transport;
        struct rpc_xprt *ret;
 
-       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+                       xprt_max_tcp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2760,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
                 */
                 return args->bc_xprt->xpt_bc_xprt;
        }
-       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+                       xprt_tcp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2947,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = {
 #define param_check_slot_table_size(name, p) \
        __param_check(name, p, unsigned int);
 
+static int param_set_max_slot_table_size(const char *val,
+                                    const struct kernel_param *kp)
+{
+       return param_set_uint_minmax(val, kp,
+                       RPC_MIN_SLOT_TABLE,
+                       RPC_MAX_SLOT_TABLE_LIMIT);
+}
+
+static struct kernel_param_ops param_ops_max_slot_table_size = {
+       .set = param_set_max_slot_table_size,
+       .get = param_get_uint,
+};
+
+#define param_check_max_slot_table_size(name, p) \
+       __param_check(name, p, unsigned int);
+
 module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
                   slot_table_size, 0644);
+module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
+                  max_slot_table_size, 0644);
 module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
                   slot_table_size, 0644);
 
index 1ad0f39..02751db 100644 (file)
@@ -903,7 +903,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
            initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
            !is_world_regdom(last_request->alpha2)) {
                REG_DBG_PRINT("Ignoring regulatory request %s "
-                             "since the driver requires its own regulaotry "
+                             "since the driver requires its own regulatory "
                              "domain to be set first",
                              reg_initiator_name(initiator));
                return true;
@@ -1125,12 +1125,13 @@ void wiphy_update_regulatory(struct wiphy *wiphy,
        enum ieee80211_band band;
 
        if (ignore_reg_update(wiphy, initiator))
-               goto out;
+               return;
+
        for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
                if (wiphy->bands[band])
                        handle_band(wiphy, band, initiator);
        }
-out:
+
        reg_process_beacons(wiphy);
        reg_process_ht_flags(wiphy);
        if (wiphy->reg_notifier)
index 7312bf9..c1e18ba 100644 (file)
@@ -73,7 +73,6 @@ static int may_change_ptraced_domain(struct task_struct *task,
                cred = get_task_cred(tracer);
                tracerp = aa_cred_profile(cred);
        }
-       rcu_read_unlock();
 
        /* not ptraced */
        if (!tracer || unconfined(tracerp))
@@ -82,6 +81,7 @@ static int may_change_ptraced_domain(struct task_struct *task,
        error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH);
 
 out:
+       rcu_read_unlock();
        if (cred)
                put_cred(cred);
 
index 3d2fd14..3783202 100644 (file)
@@ -127,7 +127,7 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective,
        *inheritable = cred->cap_inheritable;
        *permitted = cred->cap_permitted;
 
-       if (!unconfined(profile)) {
+       if (!unconfined(profile) && !COMPLAIN_MODE(profile)) {
                *effective = cap_intersect(*effective, profile->caps.allow);
                *permitted = cap_intersect(*permitted, profile->caps.allow);
        }
index 39d66dc..26b46ff 100644 (file)
@@ -86,7 +86,7 @@ static void ima_check_last_writer(struct ima_iint_cache *iint,
                                  struct inode *inode,
                                  struct file *file)
 {
-       mode_t mode = file->f_mode;
+       fmode_t mode = file->f_mode;
 
        mutex_lock(&iint->mutex);
        if (mode & FMODE_WRITE &&
index 1bf090a..b34cc6e 100644 (file)
@@ -14,7 +14,7 @@ obj-y := \
        user_defined.o
 
 obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
-obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted.o
+obj-$(CONFIG_ENCRYPTED_KEYS) += ecryptfs_format.o encrypted.o
 obj-$(CONFIG_KEYS_COMPAT) += compat.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/security/keys/ecryptfs_format.c b/security/keys/ecryptfs_format.c
new file mode 100644 (file)
index 0000000..6daa3b6
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * ecryptfs_format.c: helper functions for the encrypted key type
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ * Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include "ecryptfs_format.h"
+
+u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok)
+{
+       return auth_tok->token.password.session_key_encryption_key;
+}
+EXPORT_SYMBOL(ecryptfs_get_auth_tok_key);
+
+/*
+ * ecryptfs_get_versions()
+ *
+ * Source code taken from the software 'ecryptfs-utils' version 83.
+ *
+ */
+void ecryptfs_get_versions(int *major, int *minor, int *file_version)
+{
+       *major = ECRYPTFS_VERSION_MAJOR;
+       *minor = ECRYPTFS_VERSION_MINOR;
+       if (file_version)
+               *file_version = ECRYPTFS_SUPPORTED_FILE_VERSION;
+}
+EXPORT_SYMBOL(ecryptfs_get_versions);
+
+/*
+ * ecryptfs_fill_auth_tok - fill the ecryptfs_auth_tok structure
+ *
+ * Fill the ecryptfs_auth_tok structure with required ecryptfs data.
+ * The source code is inspired to the original function generate_payload()
+ * shipped with the software 'ecryptfs-utils' version 83.
+ *
+ */
+int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok,
+                          const char *key_desc)
+{
+       int major, minor;
+
+       ecryptfs_get_versions(&major, &minor, NULL);
+       auth_tok->version = (((uint16_t)(major << 8) & 0xFF00)
+                            | ((uint16_t)minor & 0x00FF));
+       auth_tok->token_type = ECRYPTFS_PASSWORD;
+       strncpy((char *)auth_tok->token.password.signature, key_desc,
+               ECRYPTFS_PASSWORD_SIG_SIZE);
+       auth_tok->token.password.session_key_encryption_key_bytes =
+               ECRYPTFS_MAX_KEY_BYTES;
+       /*
+        * Removed auth_tok->token.password.salt and
+        * auth_tok->token.password.session_key_encryption_key
+        * initialization from the original code
+        */
+       /* TODO: Make the hash parameterizable via policy */
+       auth_tok->token.password.flags |=
+               ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET;
+       /* The kernel code will encrypt the session key. */
+       auth_tok->session_key.encrypted_key[0] = 0;
+       auth_tok->session_key.encrypted_key_size = 0;
+       /* Default; subject to change by kernel eCryptfs */
+       auth_tok->token.password.hash_algo = PGP_DIGEST_ALGO_SHA512;
+       auth_tok->token.password.flags &= ~(ECRYPTFS_PERSISTENT_PASSWORD);
+       return 0;
+}
+EXPORT_SYMBOL(ecryptfs_fill_auth_tok);
+
+MODULE_LICENSE("GPL");
diff --git a/security/keys/ecryptfs_format.h b/security/keys/ecryptfs_format.h
new file mode 100644 (file)
index 0000000..40294de
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * ecryptfs_format.h: helper functions for the encrypted key type
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ * Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#ifndef __KEYS_ECRYPTFS_H
+#define __KEYS_ECRYPTFS_H
+
+#include <linux/ecryptfs.h>
+
+#define PGP_DIGEST_ALGO_SHA512   10
+
+u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok);
+void ecryptfs_get_versions(int *major, int *minor, int *file_version);
+int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok,
+                          const char *key_desc);
+
+#endif /* __KEYS_ECRYPTFS_H */
index b1cba5b..e7eca9e 100644 (file)
@@ -1,8 +1,11 @@
 /*
  * Copyright (C) 2010 IBM Corporation
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
  *
- * Author:
+ * Authors:
  * Mimi Zohar <zohar@us.ibm.com>
+ * Roberto Sassu <roberto.sassu@polito.it>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 #include <linux/rcupdate.h>
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
+#include <linux/ctype.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/aes.h>
 
 #include "encrypted.h"
+#include "ecryptfs_format.h"
 
 static const char KEY_TRUSTED_PREFIX[] = "trusted:";
 static const char KEY_USER_PREFIX[] = "user:";
 static const char hash_alg[] = "sha256";
 static const char hmac_alg[] = "hmac(sha256)";
 static const char blkcipher_alg[] = "cbc(aes)";
+static const char key_format_default[] = "default";
+static const char key_format_ecryptfs[] = "ecryptfs";
 static unsigned int ivsize;
 static int blksize;
 
 #define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1)
 #define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1)
+#define KEY_ECRYPTFS_DESC_LEN 16
 #define HASH_SIZE SHA256_DIGEST_SIZE
 #define MAX_DATA_SIZE 4096
 #define MIN_DATA_SIZE  20
@@ -58,6 +66,16 @@ enum {
        Opt_err = -1, Opt_new, Opt_load, Opt_update
 };
 
+enum {
+       Opt_error = -1, Opt_default, Opt_ecryptfs
+};
+
+static const match_table_t key_format_tokens = {
+       {Opt_default, "default"},
+       {Opt_ecryptfs, "ecryptfs"},
+       {Opt_error, NULL}
+};
+
 static const match_table_t key_tokens = {
        {Opt_new, "new"},
        {Opt_load, "load"},
@@ -82,9 +100,37 @@ static int aes_get_sizes(void)
 }
 
 /*
+ * valid_ecryptfs_desc - verify the description of a new/loaded encrypted key
+ *
+ * The description of a encrypted key with format 'ecryptfs' must contain
+ * exactly 16 hexadecimal characters.
+ *
+ */
+static int valid_ecryptfs_desc(const char *ecryptfs_desc)
+{
+       int i;
+
+       if (strlen(ecryptfs_desc) != KEY_ECRYPTFS_DESC_LEN) {
+               pr_err("encrypted_key: key description must be %d hexadecimal "
+                      "characters long\n", KEY_ECRYPTFS_DESC_LEN);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < KEY_ECRYPTFS_DESC_LEN; i++) {
+               if (!isxdigit(ecryptfs_desc[i])) {
+                       pr_err("encrypted_key: key description must contain "
+                              "only hexadecimal characters\n");
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/*
  * valid_master_desc - verify the 'key-type:desc' of a new/updated master-key
  *
- * key-type:= "trusted:" | "encrypted:"
+ * key-type:= "trusted:" | "user:"
  * desc:= master-key description
  *
  * Verify that 'key-type' is valid and that 'desc' exists. On key update,
@@ -118,8 +164,9 @@ out:
  * datablob_parse - parse the keyctl data
  *
  * datablob format:
- * new <master-key name> <decrypted data length>
- * load <master-key name> <decrypted data length> <encrypted iv + data>
+ * new [<format>] <master-key name> <decrypted data length>
+ * load [<format>] <master-key name> <decrypted data length>
+ *     <encrypted iv + data>
  * update <new-master-key name>
  *
  * Tokenizes a copy of the keyctl data, returning a pointer to each token,
@@ -127,52 +174,95 @@ out:
  *
  * On success returns 0, otherwise -EINVAL.
  */
-static int datablob_parse(char *datablob, char **master_desc,
-                         char **decrypted_datalen, char **hex_encoded_iv)
+static int datablob_parse(char *datablob, const char **format,
+                         char **master_desc, char **decrypted_datalen,
+                         char **hex_encoded_iv)
 {
        substring_t args[MAX_OPT_ARGS];
        int ret = -EINVAL;
        int key_cmd;
-       char *p;
+       int key_format;
+       char *p, *keyword;
+
+       keyword = strsep(&datablob, " \t");
+       if (!keyword) {
+               pr_info("encrypted_key: insufficient parameters specified\n");
+               return ret;
+       }
+       key_cmd = match_token(keyword, key_tokens, args);
 
+       /* Get optional format: default | ecryptfs */
        p = strsep(&datablob, " \t");
-       if (!p)
+       if (!p) {
+               pr_err("encrypted_key: insufficient parameters specified\n");
                return ret;
-       key_cmd = match_token(p, key_tokens, args);
+       }
 
-       *master_desc = strsep(&datablob, " \t");
-       if (!*master_desc)
+       key_format = match_token(p, key_format_tokens, args);
+       switch (key_format) {
+       case Opt_ecryptfs:
+       case Opt_default:
+               *format = p;
+               *master_desc = strsep(&datablob, " \t");
+               break;
+       case Opt_error:
+               *master_desc = p;
+               break;
+       }
+
+       if (!*master_desc) {
+               pr_info("encrypted_key: master key parameter is missing\n");
                goto out;
+       }
 
-       if (valid_master_desc(*master_desc, NULL) < 0)
+       if (valid_master_desc(*master_desc, NULL) < 0) {
+               pr_info("encrypted_key: master key parameter \'%s\' "
+                       "is invalid\n", *master_desc);
                goto out;
+       }
 
        if (decrypted_datalen) {
                *decrypted_datalen = strsep(&datablob, " \t");
-               if (!*decrypted_datalen)
+               if (!*decrypted_datalen) {
+                       pr_info("encrypted_key: keylen parameter is missing\n");
                        goto out;
+               }
        }
 
        switch (key_cmd) {
        case Opt_new:
-               if (!decrypted_datalen)
+               if (!decrypted_datalen) {
+                       pr_info("encrypted_key: keyword \'%s\' not allowed "
+                               "when called from .update method\n", keyword);
                        break;
+               }
                ret = 0;
                break;
        case Opt_load:
-               if (!decrypted_datalen)
+               if (!decrypted_datalen) {
+                       pr_info("encrypted_key: keyword \'%s\' not allowed "
+                               "when called from .update method\n", keyword);
                        break;
+               }
                *hex_encoded_iv = strsep(&datablob, " \t");
-               if (!*hex_encoded_iv)
+               if (!*hex_encoded_iv) {
+                       pr_info("encrypted_key: hex blob is missing\n");
                        break;
+               }
                ret = 0;
                break;
        case Opt_update:
-               if (decrypted_datalen)
+               if (decrypted_datalen) {
+                       pr_info("encrypted_key: keyword \'%s\' not allowed "
+                               "when called from .instantiate method\n",
+                               keyword);
                        break;
+               }
                ret = 0;
                break;
        case Opt_err:
+               pr_info("encrypted_key: keyword \'%s\' not recognized\n",
+                       keyword);
                break;
        }
 out:
@@ -197,8 +287,8 @@ static char *datablob_format(struct encrypted_key_payload *epayload,
        ascii_buf[asciiblob_len] = '\0';
 
        /* copy datablob master_desc and datalen strings */
-       len = sprintf(ascii_buf, "%s %s ", epayload->master_desc,
-                     epayload->datalen);
+       len = sprintf(ascii_buf, "%s %s %s ", epayload->format,
+                     epayload->master_desc, epayload->datalen);
 
        /* convert the hex encoded iv, encrypted-data and HMAC to ascii */
        bufp = &ascii_buf[len];
@@ -378,11 +468,13 @@ static struct key *request_master_key(struct encrypted_key_payload *epayload,
        } else
                goto out;
 
-       if (IS_ERR(mkey))
+       if (IS_ERR(mkey)) {
                pr_info("encrypted_key: key %s not found",
                        epayload->master_desc);
-       if (mkey)
-               dump_master_key(*master_key, *master_keylen);
+               goto out;
+       }
+
+       dump_master_key(*master_key, *master_keylen);
 out:
        return mkey;
 }
@@ -439,9 +531,9 @@ static int datablob_hmac_append(struct encrypted_key_payload *epayload,
        if (ret < 0)
                goto out;
 
-       digest = epayload->master_desc + epayload->datablob_len;
+       digest = epayload->format + epayload->datablob_len;
        ret = calc_hmac(digest, derived_key, sizeof derived_key,
-                       epayload->master_desc, epayload->datablob_len);
+                       epayload->format, epayload->datablob_len);
        if (!ret)
                dump_hmac(NULL, digest, HASH_SIZE);
 out:
@@ -450,26 +542,35 @@ out:
 
 /* verify HMAC before decrypting encrypted key */
 static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
-                               const u8 *master_key, size_t master_keylen)
+                               const u8 *format, const u8 *master_key,
+                               size_t master_keylen)
 {
        u8 derived_key[HASH_SIZE];
        u8 digest[HASH_SIZE];
        int ret;
+       char *p;
+       unsigned short len;
 
        ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen);
        if (ret < 0)
                goto out;
 
-       ret = calc_hmac(digest, derived_key, sizeof derived_key,
-                       epayload->master_desc, epayload->datablob_len);
+       len = epayload->datablob_len;
+       if (!format) {
+               p = epayload->master_desc;
+               len -= strlen(epayload->format) + 1;
+       } else
+               p = epayload->format;
+
+       ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len);
        if (ret < 0)
                goto out;
-       ret = memcmp(digest, epayload->master_desc + epayload->datablob_len,
+       ret = memcmp(digest, epayload->format + epayload->datablob_len,
                     sizeof digest);
        if (ret) {
                ret = -EINVAL;
                dump_hmac("datablob",
-                         epayload->master_desc + epayload->datablob_len,
+                         epayload->format + epayload->datablob_len,
                          HASH_SIZE);
                dump_hmac("calc", digest, HASH_SIZE);
        }
@@ -514,13 +615,16 @@ out:
 
 /* Allocate memory for decrypted key and datablob. */
 static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
+                                                        const char *format,
                                                         const char *master_desc,
                                                         const char *datalen)
 {
        struct encrypted_key_payload *epayload = NULL;
        unsigned short datablob_len;
        unsigned short decrypted_datalen;
+       unsigned short payload_datalen;
        unsigned int encrypted_datalen;
+       unsigned int format_len;
        long dlen;
        int ret;
 
@@ -528,29 +632,43 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
        if (ret < 0 || dlen < MIN_DATA_SIZE || dlen > MAX_DATA_SIZE)
                return ERR_PTR(-EINVAL);
 
+       format_len = (!format) ? strlen(key_format_default) : strlen(format);
        decrypted_datalen = dlen;
+       payload_datalen = decrypted_datalen;
+       if (format && !strcmp(format, key_format_ecryptfs)) {
+               if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
+                       pr_err("encrypted_key: keylen for the ecryptfs format "
+                              "must be equal to %d bytes\n",
+                              ECRYPTFS_MAX_KEY_BYTES);
+                       return ERR_PTR(-EINVAL);
+               }
+               decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES;
+               payload_datalen = sizeof(struct ecryptfs_auth_tok);
+       }
+
        encrypted_datalen = roundup(decrypted_datalen, blksize);
 
-       datablob_len = strlen(master_desc) + 1 + strlen(datalen) + 1
-           + ivsize + 1 + encrypted_datalen;
+       datablob_len = format_len + 1 + strlen(master_desc) + 1
+           + strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen;
 
-       ret = key_payload_reserve(key, decrypted_datalen + datablob_len
+       ret = key_payload_reserve(key, payload_datalen + datablob_len
                                  + HASH_SIZE + 1);
        if (ret < 0)
                return ERR_PTR(ret);
 
-       epayload = kzalloc(sizeof(*epayload) + decrypted_datalen +
+       epayload = kzalloc(sizeof(*epayload) + payload_datalen +
                           datablob_len + HASH_SIZE + 1, GFP_KERNEL);
        if (!epayload)
                return ERR_PTR(-ENOMEM);
 
+       epayload->payload_datalen = payload_datalen;
        epayload->decrypted_datalen = decrypted_datalen;
        epayload->datablob_len = datablob_len;
        return epayload;
 }
 
 static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
-                                const char *hex_encoded_iv)
+                                const char *format, const char *hex_encoded_iv)
 {
        struct key *mkey;
        u8 derived_key[HASH_SIZE];
@@ -571,14 +689,14 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
        hex2bin(epayload->iv, hex_encoded_iv, ivsize);
        hex2bin(epayload->encrypted_data, hex_encoded_data, encrypted_datalen);
 
-       hmac = epayload->master_desc + epayload->datablob_len;
+       hmac = epayload->format + epayload->datablob_len;
        hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2), HASH_SIZE);
 
        mkey = request_master_key(epayload, &master_key, &master_keylen);
        if (IS_ERR(mkey))
                return PTR_ERR(mkey);
 
-       ret = datablob_hmac_verify(epayload, master_key, master_keylen);
+       ret = datablob_hmac_verify(epayload, format, master_key, master_keylen);
        if (ret < 0) {
                pr_err("encrypted_key: bad hmac (%d)\n", ret);
                goto out;
@@ -598,13 +716,28 @@ out:
 }
 
 static void __ekey_init(struct encrypted_key_payload *epayload,
-                       const char *master_desc, const char *datalen)
+                       const char *format, const char *master_desc,
+                       const char *datalen)
 {
-       epayload->master_desc = epayload->decrypted_data
-           + epayload->decrypted_datalen;
+       unsigned int format_len;
+
+       format_len = (!format) ? strlen(key_format_default) : strlen(format);
+       epayload->format = epayload->payload_data + epayload->payload_datalen;
+       epayload->master_desc = epayload->format + format_len + 1;
        epayload->datalen = epayload->master_desc + strlen(master_desc) + 1;
        epayload->iv = epayload->datalen + strlen(datalen) + 1;
        epayload->encrypted_data = epayload->iv + ivsize + 1;
+       epayload->decrypted_data = epayload->payload_data;
+
+       if (!format)
+               memcpy(epayload->format, key_format_default, format_len);
+       else {
+               if (!strcmp(format, key_format_ecryptfs))
+                       epayload->decrypted_data =
+                               ecryptfs_get_auth_tok_key((struct ecryptfs_auth_tok *)epayload->payload_data);
+
+               memcpy(epayload->format, format, format_len);
+       }
 
        memcpy(epayload->master_desc, master_desc, strlen(master_desc));
        memcpy(epayload->datalen, datalen, strlen(datalen));
@@ -617,19 +750,29 @@ static void __ekey_init(struct encrypted_key_payload *epayload,
  * itself.  For an old key, decrypt the hex encoded data.
  */
 static int encrypted_init(struct encrypted_key_payload *epayload,
+                         const char *key_desc, const char *format,
                          const char *master_desc, const char *datalen,
                          const char *hex_encoded_iv)
 {
        int ret = 0;
 
-       __ekey_init(epayload, master_desc, datalen);
+       if (format && !strcmp(format, key_format_ecryptfs)) {
+               ret = valid_ecryptfs_desc(key_desc);
+               if (ret < 0)
+                       return ret;
+
+               ecryptfs_fill_auth_tok((struct ecryptfs_auth_tok *)epayload->payload_data,
+                                      key_desc);
+       }
+
+       __ekey_init(epayload, format, master_desc, datalen);
        if (!hex_encoded_iv) {
                get_random_bytes(epayload->iv, ivsize);
 
                get_random_bytes(epayload->decrypted_data,
                                 epayload->decrypted_datalen);
        } else
-               ret = encrypted_key_decrypt(epayload, hex_encoded_iv);
+               ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv);
        return ret;
 }
 
@@ -646,6 +789,7 @@ static int encrypted_instantiate(struct key *key, const void *data,
 {
        struct encrypted_key_payload *epayload = NULL;
        char *datablob = NULL;
+       const char *format = NULL;
        char *master_desc = NULL;
        char *decrypted_datalen = NULL;
        char *hex_encoded_iv = NULL;
@@ -659,18 +803,19 @@ static int encrypted_instantiate(struct key *key, const void *data,
                return -ENOMEM;
        datablob[datalen] = 0;
        memcpy(datablob, data, datalen);
-       ret = datablob_parse(datablob, &master_desc, &decrypted_datalen,
-                            &hex_encoded_iv);
+       ret = datablob_parse(datablob, &format, &master_desc,
+                            &decrypted_datalen, &hex_encoded_iv);
        if (ret < 0)
                goto out;
 
-       epayload = encrypted_key_alloc(key, master_desc, decrypted_datalen);
+       epayload = encrypted_key_alloc(key, format, master_desc,
+                                      decrypted_datalen);
        if (IS_ERR(epayload)) {
                ret = PTR_ERR(epayload);
                goto out;
        }
-       ret = encrypted_init(epayload, master_desc, decrypted_datalen,
-                            hex_encoded_iv);
+       ret = encrypted_init(epayload, key->description, format, master_desc,
+                            decrypted_datalen, hex_encoded_iv);
        if (ret < 0) {
                kfree(epayload);
                goto out;
@@ -706,6 +851,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
        struct encrypted_key_payload *new_epayload;
        char *buf;
        char *new_master_desc = NULL;
+       const char *format = NULL;
        int ret = 0;
 
        if (datalen <= 0 || datalen > 32767 || !data)
@@ -717,7 +863,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
 
        buf[datalen] = 0;
        memcpy(buf, data, datalen);
-       ret = datablob_parse(buf, &new_master_desc, NULL, NULL);
+       ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
        if (ret < 0)
                goto out;
 
@@ -725,18 +871,19 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
        if (ret < 0)
                goto out;
 
-       new_epayload = encrypted_key_alloc(key, new_master_desc,
-                                          epayload->datalen);
+       new_epayload = encrypted_key_alloc(key, epayload->format,
+                                          new_master_desc, epayload->datalen);
        if (IS_ERR(new_epayload)) {
                ret = PTR_ERR(new_epayload);
                goto out;
        }
 
-       __ekey_init(new_epayload, new_master_desc, epayload->datalen);
+       __ekey_init(new_epayload, epayload->format, new_master_desc,
+                   epayload->datalen);
 
        memcpy(new_epayload->iv, epayload->iv, ivsize);
-       memcpy(new_epayload->decrypted_data, epayload->decrypted_data,
-              epayload->decrypted_datalen);
+       memcpy(new_epayload->payload_data, epayload->payload_data,
+              epayload->payload_datalen);
 
        rcu_assign_pointer(key->payload.data, new_epayload);
        call_rcu(&epayload->rcu, encrypted_rcu_free);
index 6cff375..60d4e3f 100644 (file)
@@ -251,6 +251,8 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
 
        if (IS_ERR(authkey_ref)) {
                authkey = ERR_CAST(authkey_ref);
+               if (authkey == ERR_PTR(-EAGAIN))
+                       authkey = ERR_PTR(-ENOKEY);
                goto error;
        }
 
index c8f3857..7c7f8c1 100644 (file)
@@ -9,3 +9,64 @@ config SECURITY_TOMOYO
          Required userspace tools and further information may be
          found at <http://tomoyo.sourceforge.jp/>.
          If you are unsure how to answer this question, answer N.
+
+config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY
+       int "Default maximal count for learning mode"
+       default 2048
+       range 0 2147483647
+       depends on SECURITY_TOMOYO
+       help
+         This is the default value for maximal ACL entries
+         that are automatically appended into policy at "learning mode".
+         Some programs access thousands of objects, so running
+         such programs in "learning mode" dulls the system response
+         and consumes much memory.
+         This is the safeguard for such programs.
+
+config SECURITY_TOMOYO_MAX_AUDIT_LOG
+       int "Default maximal count for audit log"
+       default 1024
+       range 0 2147483647
+       depends on SECURITY_TOMOYO
+       help
+         This is the default value for maximal entries for
+         audit logs that the kernel can hold on memory.
+         You can read the log via /sys/kernel/security/tomoyo/audit.
+         If you don't need audit logs, you may set this value to 0.
+
+config SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+       bool "Activate without calling userspace policy loader."
+       default n
+       depends on SECURITY_TOMOYO
+       ---help---
+         Say Y here if you want to activate access control as soon as built-in
+         policy was loaded. This option will be useful for systems where
+         operations which can lead to the hijacking of the boot sequence are
+         needed before loading the policy. For example, you can activate
+         immediately after loading the fixed part of policy which will allow
+         only operations needed for mounting a partition which contains the
+         variant part of policy and verifying (e.g. running GPG check) and
+         loading the variant part of policy. Since you can start using
+         enforcing mode from the beginning, you can reduce the possibility of
+         hijacking the boot sequence.
+
+config SECURITY_TOMOYO_POLICY_LOADER
+       string "Location of userspace policy loader"
+       default "/sbin/tomoyo-init"
+       depends on SECURITY_TOMOYO
+       depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+       ---help---
+         This is the default pathname of policy loader which is called before
+         activation. You can override this setting via TOMOYO_loader= kernel
+         command line option.
+
+config SECURITY_TOMOYO_ACTIVATION_TRIGGER
+       string "Trigger for calling userspace policy loader"
+       default "/sbin/init"
+       depends on SECURITY_TOMOYO
+       depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+       ---help---
+         This is the default pathname of activation trigger.
+         You can override this setting via TOMOYO_trigger= kernel command line
+         option. For example, if you pass init=/bin/systemd option, you may
+         want to also pass TOMOYO_trigger=/bin/systemd option.
index 91640e9..95278b7 100644 (file)
@@ -1 +1,48 @@
-obj-y = common.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o
+obj-y = audit.o common.o condition.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o
+
+$(obj)/policy/profile.conf:
+       @mkdir -p $(obj)/policy/
+       @echo Creating an empty policy/profile.conf
+       @touch $@
+
+$(obj)/policy/exception_policy.conf:
+       @mkdir -p $(obj)/policy/
+       @echo Creating a default policy/exception_policy.conf
+       @echo initialize_domain /sbin/modprobe from any >> $@
+       @echo initialize_domain /sbin/hotplug from any >> $@
+
+$(obj)/policy/domain_policy.conf:
+       @mkdir -p $(obj)/policy/
+       @echo Creating an empty policy/domain_policy.conf
+       @touch $@
+
+$(obj)/policy/manager.conf:
+       @mkdir -p $(obj)/policy/
+       @echo Creating an empty policy/manager.conf
+       @touch $@
+
+$(obj)/policy/stat.conf:
+       @mkdir -p $(obj)/policy/
+       @echo Creating an empty policy/stat.conf
+       @touch $@
+
+$(obj)/builtin-policy.h: $(obj)/policy/profile.conf $(obj)/policy/exception_policy.conf $(obj)/policy/domain_policy.conf $(obj)/policy/manager.conf $(obj)/policy/stat.conf
+       @echo Generating built-in policy for TOMOYO 2.4.x.
+       @echo "static char tomoyo_builtin_profile[] __initdata =" > $@.tmp
+       @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/profile.conf >> $@.tmp
+       @echo "\"\";" >> $@.tmp
+       @echo "static char tomoyo_builtin_exception_policy[] __initdata =" >> $@.tmp
+       @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/exception_policy.conf >> $@.tmp
+       @echo "\"\";" >> $@.tmp
+       @echo "static char tomoyo_builtin_domain_policy[] __initdata =" >> $@.tmp
+       @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/domain_policy.conf >> $@.tmp
+       @echo "\"\";" >> $@.tmp
+       @echo "static char tomoyo_builtin_manager[] __initdata =" >> $@.tmp
+       @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/manager.conf >> $@.tmp
+       @echo "\"\";" >> $@.tmp
+       @echo "static char tomoyo_builtin_stat[] __initdata =" >> $@.tmp
+       @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/stat.conf >> $@.tmp
+       @echo "\"\";" >> $@.tmp
+       @mv $@.tmp $@
+
+$(obj)/common.o: $(obj)/builtin-policy.h
diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
new file mode 100644 (file)
index 0000000..5dbb1f7
--- /dev/null
@@ -0,0 +1,456 @@
+/*
+ * security/tomoyo/audit.c
+ *
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
+ */
+
+#include "common.h"
+#include <linux/slab.h>
+
+/**
+ * tomoyo_print_bprm - Print "struct linux_binprm" for auditing.
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ * @dump: Pointer to "struct tomoyo_page_dump".
+ *
+ * Returns the contents of @bprm on success, NULL otherwise.
+ *
+ * This function uses kzalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+static char *tomoyo_print_bprm(struct linux_binprm *bprm,
+                              struct tomoyo_page_dump *dump)
+{
+       static const int tomoyo_buffer_len = 4096 * 2;
+       char *buffer = kzalloc(tomoyo_buffer_len, GFP_NOFS);
+       char *cp;
+       char *last_start;
+       int len;
+       unsigned long pos = bprm->p;
+       int offset = pos % PAGE_SIZE;
+       int argv_count = bprm->argc;
+       int envp_count = bprm->envc;
+       bool truncated = false;
+       if (!buffer)
+               return NULL;
+       len = snprintf(buffer, tomoyo_buffer_len - 1, "argv[]={ ");
+       cp = buffer + len;
+       if (!argv_count) {
+               memmove(cp, "} envp[]={ ", 11);
+               cp += 11;
+       }
+       last_start = cp;
+       while (argv_count || envp_count) {
+               if (!tomoyo_dump_page(bprm, pos, dump))
+                       goto out;
+               pos += PAGE_SIZE - offset;
+               /* Read. */
+               while (offset < PAGE_SIZE) {
+                       const char *kaddr = dump->data;
+                       const unsigned char c = kaddr[offset++];
+                       if (cp == last_start)
+                               *cp++ = '"';
+                       if (cp >= buffer + tomoyo_buffer_len - 32) {
+                               /* Reserve some room for "..." string. */
+                               truncated = true;
+                       } else if (c == '\\') {
+                               *cp++ = '\\';
+                               *cp++ = '\\';
+                       } else if (c > ' ' && c < 127) {
+                               *cp++ = c;
+                       } else if (!c) {
+                               *cp++ = '"';
+                               *cp++ = ' ';
+                               last_start = cp;
+                       } else {
+                               *cp++ = '\\';
+                               *cp++ = (c >> 6) + '0';
+                               *cp++ = ((c >> 3) & 7) + '0';
+                               *cp++ = (c & 7) + '0';
+                       }
+                       if (c)
+                               continue;
+                       if (argv_count) {
+                               if (--argv_count == 0) {
+                                       if (truncated) {
+                                               cp = last_start;
+                                               memmove(cp, "... ", 4);
+                                               cp += 4;
+                                       }
+                                       memmove(cp, "} envp[]={ ", 11);
+                                       cp += 11;
+                                       last_start = cp;
+                                       truncated = false;
+                               }
+                       } else if (envp_count) {
+                               if (--envp_count == 0) {
+                                       if (truncated) {
+                                               cp = last_start;
+                                               memmove(cp, "... ", 4);
+                                               cp += 4;
+                                       }
+                               }
+                       }
+                       if (!argv_count && !envp_count)
+                               break;
+               }
+               offset = 0;
+       }
+       *cp++ = '}';
+       *cp = '\0';
+       return buffer;
+out:
+       snprintf(buffer, tomoyo_buffer_len - 1,
+                "argv[]={ ... } envp[]= { ... }");
+       return buffer;
+}
+
+/**
+ * tomoyo_filetype - Get string representation of file type.
+ *
+ * @mode: Mode value for stat().
+ *
+ * Returns file type string.
+ */
+static inline const char *tomoyo_filetype(const mode_t mode)
+{
+       switch (mode & S_IFMT) {
+       case S_IFREG:
+       case 0:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FILE];
+       case S_IFDIR:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_DIRECTORY];
+       case S_IFLNK:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SYMLINK];
+       case S_IFIFO:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FIFO];
+       case S_IFSOCK:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SOCKET];
+       case S_IFBLK:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_BLOCK_DEV];
+       case S_IFCHR:
+               return tomoyo_condition_keyword[TOMOYO_TYPE_IS_CHAR_DEV];
+       }
+       return "unknown"; /* This should not happen. */
+}
+
+/**
+ * tomoyo_print_header - Get header line of audit log.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ *
+ * Returns string representation.
+ *
+ * This function uses kmalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+static char *tomoyo_print_header(struct tomoyo_request_info *r)
+{
+       struct tomoyo_time stamp;
+       const pid_t gpid = task_pid_nr(current);
+       struct tomoyo_obj_info *obj = r->obj;
+       static const int tomoyo_buffer_len = 4096;
+       char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
+       int pos;
+       u8 i;
+       if (!buffer)
+               return NULL;
+       {
+               struct timeval tv;
+               do_gettimeofday(&tv);
+               tomoyo_convert_time(tv.tv_sec, &stamp);
+       }
+       pos = snprintf(buffer, tomoyo_buffer_len - 1,
+                      "#%04u/%02u/%02u %02u:%02u:%02u# profile=%u mode=%s "
+                      "granted=%s (global-pid=%u) task={ pid=%u ppid=%u "
+                      "uid=%u gid=%u euid=%u egid=%u suid=%u sgid=%u "
+                      "fsuid=%u fsgid=%u }", stamp.year, stamp.month,
+                      stamp.day, stamp.hour, stamp.min, stamp.sec, r->profile,
+                      tomoyo_mode[r->mode], tomoyo_yesno(r->granted), gpid,
+                      tomoyo_sys_getpid(), tomoyo_sys_getppid(),
+                      current_uid(), current_gid(), current_euid(),
+                      current_egid(), current_suid(), current_sgid(),
+                      current_fsuid(), current_fsgid());
+       if (!obj)
+               goto no_obj_info;
+       if (!obj->validate_done) {
+               tomoyo_get_attributes(obj);
+               obj->validate_done = true;
+       }
+       for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
+               struct tomoyo_mini_stat *stat;
+               unsigned int dev;
+               mode_t mode;
+               if (!obj->stat_valid[i])
+                       continue;
+               stat = &obj->stat[i];
+               dev = stat->dev;
+               mode = stat->mode;
+               if (i & 1) {
+                       pos += snprintf(buffer + pos,
+                                       tomoyo_buffer_len - 1 - pos,
+                                       " path%u.parent={ uid=%u gid=%u "
+                                       "ino=%lu perm=0%o }", (i >> 1) + 1,
+                                       stat->uid, stat->gid, (unsigned long)
+                                       stat->ino, stat->mode & S_IALLUGO);
+                       continue;
+               }
+               pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
+                               " path%u={ uid=%u gid=%u ino=%lu major=%u"
+                               " minor=%u perm=0%o type=%s", (i >> 1) + 1,
+                               stat->uid, stat->gid, (unsigned long)
+                               stat->ino, MAJOR(dev), MINOR(dev),
+                               mode & S_IALLUGO, tomoyo_filetype(mode));
+               if (S_ISCHR(mode) || S_ISBLK(mode)) {
+                       dev = stat->rdev;
+                       pos += snprintf(buffer + pos,
+                                       tomoyo_buffer_len - 1 - pos,
+                                       " dev_major=%u dev_minor=%u",
+                                       MAJOR(dev), MINOR(dev));
+               }
+               pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
+                               " }");
+       }
+no_obj_info:
+       if (pos < tomoyo_buffer_len - 1)
+               return buffer;
+       kfree(buffer);
+       return NULL;
+}
+
+/**
+ * tomoyo_init_log - Allocate buffer for audit logs.
+ *
+ * @r:    Pointer to "struct tomoyo_request_info".
+ * @len:  Buffer size needed for @fmt and @args.
+ * @fmt:  The printf()'s format string.
+ * @args: va_list structure for @fmt.
+ *
+ * Returns pointer to allocated memory.
+ *
+ * This function uses kzalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
+                     va_list args)
+{
+       char *buf = NULL;
+       char *bprm_info = NULL;
+       const char *header = NULL;
+       char *realpath = NULL;
+       const char *symlink = NULL;
+       int pos;
+       const char *domainname = r->domain->domainname->name;
+       header = tomoyo_print_header(r);
+       if (!header)
+               return NULL;
+       /* +10 is for '\n' etc. and '\0'. */
+       len += strlen(domainname) + strlen(header) + 10;
+       if (r->ee) {
+               struct file *file = r->ee->bprm->file;
+               realpath = tomoyo_realpath_from_path(&file->f_path);
+               bprm_info = tomoyo_print_bprm(r->ee->bprm, &r->ee->dump);
+               if (!realpath || !bprm_info)
+                       goto out;
+               /* +80 is for " exec={ realpath=\"%s\" argc=%d envc=%d %s }" */
+               len += strlen(realpath) + 80 + strlen(bprm_info);
+       } else if (r->obj && r->obj->symlink_target) {
+               symlink = r->obj->symlink_target->name;
+               /* +18 is for " symlink.target=\"%s\"" */
+               len += 18 + strlen(symlink);
+       }
+       len = tomoyo_round2(len);
+       buf = kzalloc(len, GFP_NOFS);
+       if (!buf)
+               goto out;
+       len--;
+       pos = snprintf(buf, len, "%s", header);
+       if (realpath) {
+               struct linux_binprm *bprm = r->ee->bprm;
+               pos += snprintf(buf + pos, len - pos,
+                               " exec={ realpath=\"%s\" argc=%d envc=%d %s }",
+                               realpath, bprm->argc, bprm->envc, bprm_info);
+       } else if (symlink)
+               pos += snprintf(buf + pos, len - pos, " symlink.target=\"%s\"",
+                               symlink);
+       pos += snprintf(buf + pos, len - pos, "\n%s\n", domainname);
+       vsnprintf(buf + pos, len - pos, fmt, args);
+out:
+       kfree(realpath);
+       kfree(bprm_info);
+       kfree(header);
+       return buf;
+}
+
+/* Wait queue for /sys/kernel/security/tomoyo/audit. */
+static DECLARE_WAIT_QUEUE_HEAD(tomoyo_log_wait);
+
+/* Structure for audit log. */
+struct tomoyo_log {
+       struct list_head list;
+       char *log;
+       int size;
+};
+
+/* The list for "struct tomoyo_log". */
+static LIST_HEAD(tomoyo_log);
+
+/* Lock for "struct list_head tomoyo_log". */
+static DEFINE_SPINLOCK(tomoyo_log_lock);
+
+/* Length of "stuct list_head tomoyo_log". */
+static unsigned int tomoyo_log_count;
+
+/**
+ * tomoyo_get_audit - Get audit mode.
+ *
+ * @ns:          Pointer to "struct tomoyo_policy_namespace".
+ * @profile:     Profile number.
+ * @index:       Index number of functionality.
+ * @is_granted:  True if granted log, false otherwise.
+ *
+ * Returns true if this request should be audited, false otherwise.
+ */
+static bool tomoyo_get_audit(const struct tomoyo_policy_namespace *ns,
+                            const u8 profile, const u8 index,
+                            const bool is_granted)
+{
+       u8 mode;
+       const u8 category = tomoyo_index2category[index] +
+               TOMOYO_MAX_MAC_INDEX;
+       struct tomoyo_profile *p;
+       if (!tomoyo_policy_loaded)
+               return false;
+       p = tomoyo_profile(ns, profile);
+       if (tomoyo_log_count >= p->pref[TOMOYO_PREF_MAX_AUDIT_LOG])
+               return false;
+       mode = p->config[index];
+       if (mode == TOMOYO_CONFIG_USE_DEFAULT)
+               mode = p->config[category];
+       if (mode == TOMOYO_CONFIG_USE_DEFAULT)
+               mode = p->default_config;
+       if (is_granted)
+               return mode & TOMOYO_CONFIG_WANT_GRANT_LOG;
+       return mode & TOMOYO_CONFIG_WANT_REJECT_LOG;
+}
+
+/**
+ * tomoyo_write_log2 - Write an audit log.
+ *
+ * @r:    Pointer to "struct tomoyo_request_info".
+ * @len:  Buffer size needed for @fmt and @args.
+ * @fmt:  The printf()'s format string.
+ * @args: va_list structure for @fmt.
+ *
+ * Returns nothing.
+ */
+void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
+                      va_list args)
+{
+       char *buf;
+       struct tomoyo_log *entry;
+       bool quota_exceeded = false;
+       if (!tomoyo_get_audit(r->domain->ns, r->profile, r->type, r->granted))
+               goto out;
+       buf = tomoyo_init_log(r, len, fmt, args);
+       if (!buf)
+               goto out;
+       entry = kzalloc(sizeof(*entry), GFP_NOFS);
+       if (!entry) {
+               kfree(buf);
+               goto out;
+       }
+       entry->log = buf;
+       len = tomoyo_round2(strlen(buf) + 1);
+       /*
+        * The entry->size is used for memory quota checks.
+        * Don't go beyond strlen(entry->log).
+        */
+       entry->size = len + tomoyo_round2(sizeof(*entry));
+       spin_lock(&tomoyo_log_lock);
+       if (tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT] &&
+           tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] + entry->size >=
+           tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT]) {
+               quota_exceeded = true;
+       } else {
+               tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] += entry->size;
+               list_add_tail(&entry->list, &tomoyo_log);
+               tomoyo_log_count++;
+       }
+       spin_unlock(&tomoyo_log_lock);
+       if (quota_exceeded) {
+               kfree(buf);
+               kfree(entry);
+               goto out;
+       }
+       wake_up(&tomoyo_log_wait);
+out:
+       return;
+}
+
+/**
+ * tomoyo_write_log - Write an audit log.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @fmt: The printf()'s format string, followed by parameters.
+ *
+ * Returns nothing.
+ */
+void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
+{
+       va_list args;
+       int len;
+       va_start(args, fmt);
+       len = vsnprintf((char *) &len, 1, fmt, args) + 1;
+       va_end(args);
+       va_start(args, fmt);
+       tomoyo_write_log2(r, len, fmt, args);
+       va_end(args);
+}
+
+/**
+ * tomoyo_read_log - Read an audit log.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+void tomoyo_read_log(struct tomoyo_io_buffer *head)
+{
+       struct tomoyo_log *ptr = NULL;
+       if (head->r.w_pos)
+               return;
+       kfree(head->read_buf);
+       head->read_buf = NULL;
+       spin_lock(&tomoyo_log_lock);
+       if (!list_empty(&tomoyo_log)) {
+               ptr = list_entry(tomoyo_log.next, typeof(*ptr), list);
+               list_del(&ptr->list);
+               tomoyo_log_count--;
+               tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] -= ptr->size;
+       }
+       spin_unlock(&tomoyo_log_lock);
+       if (ptr) {
+               head->read_buf = ptr->log;
+               head->r.w[head->r.w_pos++] = head->read_buf;
+               kfree(ptr);
+       }
+}
+
+/**
+ * tomoyo_poll_log - Wait for an audit log.
+ *
+ * @file: Pointer to "struct file".
+ * @wait: Pointer to "poll_table".
+ *
+ * Returns POLLIN | POLLRDNORM when ready to read an audit log.
+ */
+int tomoyo_poll_log(struct file *file, poll_table *wait)
+{
+       if (tomoyo_log_count)
+               return POLLIN | POLLRDNORM;
+       poll_wait(file, &tomoyo_log_wait, wait);
+       if (tomoyo_log_count)
+               return POLLIN | POLLRDNORM;
+       return 0;
+}
index a0d09e5..c8439cf 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/common.c
  *
- * Common functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/uaccess.h>
 #include <linux/security.h>
 #include "common.h"
 
-static struct tomoyo_profile tomoyo_default_profile = {
-       .learning = &tomoyo_default_profile.preference,
-       .permissive = &tomoyo_default_profile.preference,
-       .enforcing = &tomoyo_default_profile.preference,
-       .preference.enforcing_verbose = true,
-       .preference.learning_max_entry = 2048,
-       .preference.learning_verbose = false,
-       .preference.permissive_verbose = true
+/* String table for operation mode. */
+const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE] = {
+       [TOMOYO_CONFIG_DISABLED]   = "disabled",
+       [TOMOYO_CONFIG_LEARNING]   = "learning",
+       [TOMOYO_CONFIG_PERMISSIVE] = "permissive",
+       [TOMOYO_CONFIG_ENFORCING]  = "enforcing"
 };
 
-/* Profile version. Currently only 20090903 is defined. */
-static unsigned int tomoyo_profile_version;
+/* String table for /sys/kernel/security/tomoyo/profile */
+const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+                                      + TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
+       [TOMOYO_MAC_FILE_EXECUTE]    = "execute",
+       [TOMOYO_MAC_FILE_OPEN]       = "open",
+       [TOMOYO_MAC_FILE_CREATE]     = "create",
+       [TOMOYO_MAC_FILE_UNLINK]     = "unlink",
+       [TOMOYO_MAC_FILE_GETATTR]    = "getattr",
+       [TOMOYO_MAC_FILE_MKDIR]      = "mkdir",
+       [TOMOYO_MAC_FILE_RMDIR]      = "rmdir",
+       [TOMOYO_MAC_FILE_MKFIFO]     = "mkfifo",
+       [TOMOYO_MAC_FILE_MKSOCK]     = "mksock",
+       [TOMOYO_MAC_FILE_TRUNCATE]   = "truncate",
+       [TOMOYO_MAC_FILE_SYMLINK]    = "symlink",
+       [TOMOYO_MAC_FILE_MKBLOCK]    = "mkblock",
+       [TOMOYO_MAC_FILE_MKCHAR]     = "mkchar",
+       [TOMOYO_MAC_FILE_LINK]       = "link",
+       [TOMOYO_MAC_FILE_RENAME]     = "rename",
+       [TOMOYO_MAC_FILE_CHMOD]      = "chmod",
+       [TOMOYO_MAC_FILE_CHOWN]      = "chown",
+       [TOMOYO_MAC_FILE_CHGRP]      = "chgrp",
+       [TOMOYO_MAC_FILE_IOCTL]      = "ioctl",
+       [TOMOYO_MAC_FILE_CHROOT]     = "chroot",
+       [TOMOYO_MAC_FILE_MOUNT]      = "mount",
+       [TOMOYO_MAC_FILE_UMOUNT]     = "unmount",
+       [TOMOYO_MAC_FILE_PIVOT_ROOT] = "pivot_root",
+       [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file",
+};
 
-/* Profile table. Memory is allocated as needed. */
-static struct tomoyo_profile *tomoyo_profile_ptr[TOMOYO_MAX_PROFILES];
+/* String table for conditions. */
+const char * const tomoyo_condition_keyword[TOMOYO_MAX_CONDITION_KEYWORD] = {
+       [TOMOYO_TASK_UID]             = "task.uid",
+       [TOMOYO_TASK_EUID]            = "task.euid",
+       [TOMOYO_TASK_SUID]            = "task.suid",
+       [TOMOYO_TASK_FSUID]           = "task.fsuid",
+       [TOMOYO_TASK_GID]             = "task.gid",
+       [TOMOYO_TASK_EGID]            = "task.egid",
+       [TOMOYO_TASK_SGID]            = "task.sgid",
+       [TOMOYO_TASK_FSGID]           = "task.fsgid",
+       [TOMOYO_TASK_PID]             = "task.pid",
+       [TOMOYO_TASK_PPID]            = "task.ppid",
+       [TOMOYO_EXEC_ARGC]            = "exec.argc",
+       [TOMOYO_EXEC_ENVC]            = "exec.envc",
+       [TOMOYO_TYPE_IS_SOCKET]       = "socket",
+       [TOMOYO_TYPE_IS_SYMLINK]      = "symlink",
+       [TOMOYO_TYPE_IS_FILE]         = "file",
+       [TOMOYO_TYPE_IS_BLOCK_DEV]    = "block",
+       [TOMOYO_TYPE_IS_DIRECTORY]    = "directory",
+       [TOMOYO_TYPE_IS_CHAR_DEV]     = "char",
+       [TOMOYO_TYPE_IS_FIFO]         = "fifo",
+       [TOMOYO_MODE_SETUID]          = "setuid",
+       [TOMOYO_MODE_SETGID]          = "setgid",
+       [TOMOYO_MODE_STICKY]          = "sticky",
+       [TOMOYO_MODE_OWNER_READ]      = "owner_read",
+       [TOMOYO_MODE_OWNER_WRITE]     = "owner_write",
+       [TOMOYO_MODE_OWNER_EXECUTE]   = "owner_execute",
+       [TOMOYO_MODE_GROUP_READ]      = "group_read",
+       [TOMOYO_MODE_GROUP_WRITE]     = "group_write",
+       [TOMOYO_MODE_GROUP_EXECUTE]   = "group_execute",
+       [TOMOYO_MODE_OTHERS_READ]     = "others_read",
+       [TOMOYO_MODE_OTHERS_WRITE]    = "others_write",
+       [TOMOYO_MODE_OTHERS_EXECUTE]  = "others_execute",
+       [TOMOYO_EXEC_REALPATH]        = "exec.realpath",
+       [TOMOYO_SYMLINK_TARGET]       = "symlink.target",
+       [TOMOYO_PATH1_UID]            = "path1.uid",
+       [TOMOYO_PATH1_GID]            = "path1.gid",
+       [TOMOYO_PATH1_INO]            = "path1.ino",
+       [TOMOYO_PATH1_MAJOR]          = "path1.major",
+       [TOMOYO_PATH1_MINOR]          = "path1.minor",
+       [TOMOYO_PATH1_PERM]           = "path1.perm",
+       [TOMOYO_PATH1_TYPE]           = "path1.type",
+       [TOMOYO_PATH1_DEV_MAJOR]      = "path1.dev_major",
+       [TOMOYO_PATH1_DEV_MINOR]      = "path1.dev_minor",
+       [TOMOYO_PATH2_UID]            = "path2.uid",
+       [TOMOYO_PATH2_GID]            = "path2.gid",
+       [TOMOYO_PATH2_INO]            = "path2.ino",
+       [TOMOYO_PATH2_MAJOR]          = "path2.major",
+       [TOMOYO_PATH2_MINOR]          = "path2.minor",
+       [TOMOYO_PATH2_PERM]           = "path2.perm",
+       [TOMOYO_PATH2_TYPE]           = "path2.type",
+       [TOMOYO_PATH2_DEV_MAJOR]      = "path2.dev_major",
+       [TOMOYO_PATH2_DEV_MINOR]      = "path2.dev_minor",
+       [TOMOYO_PATH1_PARENT_UID]     = "path1.parent.uid",
+       [TOMOYO_PATH1_PARENT_GID]     = "path1.parent.gid",
+       [TOMOYO_PATH1_PARENT_INO]     = "path1.parent.ino",
+       [TOMOYO_PATH1_PARENT_PERM]    = "path1.parent.perm",
+       [TOMOYO_PATH2_PARENT_UID]     = "path2.parent.uid",
+       [TOMOYO_PATH2_PARENT_GID]     = "path2.parent.gid",
+       [TOMOYO_PATH2_PARENT_INO]     = "path2.parent.ino",
+       [TOMOYO_PATH2_PARENT_PERM]    = "path2.parent.perm",
+};
 
-/* String table for functionality that takes 4 modes. */
-static const char *tomoyo_mode[4] = {
-       "disabled", "learning", "permissive", "enforcing"
+/* String table for PREFERENCE keyword. */
+static const char * const tomoyo_pref_keywords[TOMOYO_MAX_PREF] = {
+       [TOMOYO_PREF_MAX_AUDIT_LOG]      = "max_audit_log",
+       [TOMOYO_PREF_MAX_LEARNING_ENTRY] = "max_learning_entry",
 };
 
-/* String table for /sys/kernel/security/tomoyo/profile */
-static const char *tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
-                                      + TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
-       [TOMOYO_MAC_FILE_EXECUTE]    = "file::execute",
-       [TOMOYO_MAC_FILE_OPEN]       = "file::open",
-       [TOMOYO_MAC_FILE_CREATE]     = "file::create",
-       [TOMOYO_MAC_FILE_UNLINK]     = "file::unlink",
-       [TOMOYO_MAC_FILE_MKDIR]      = "file::mkdir",
-       [TOMOYO_MAC_FILE_RMDIR]      = "file::rmdir",
-       [TOMOYO_MAC_FILE_MKFIFO]     = "file::mkfifo",
-       [TOMOYO_MAC_FILE_MKSOCK]     = "file::mksock",
-       [TOMOYO_MAC_FILE_TRUNCATE]   = "file::truncate",
-       [TOMOYO_MAC_FILE_SYMLINK]    = "file::symlink",
-       [TOMOYO_MAC_FILE_REWRITE]    = "file::rewrite",
-       [TOMOYO_MAC_FILE_MKBLOCK]    = "file::mkblock",
-       [TOMOYO_MAC_FILE_MKCHAR]     = "file::mkchar",
-       [TOMOYO_MAC_FILE_LINK]       = "file::link",
-       [TOMOYO_MAC_FILE_RENAME]     = "file::rename",
-       [TOMOYO_MAC_FILE_CHMOD]      = "file::chmod",
-       [TOMOYO_MAC_FILE_CHOWN]      = "file::chown",
-       [TOMOYO_MAC_FILE_CHGRP]      = "file::chgrp",
-       [TOMOYO_MAC_FILE_IOCTL]      = "file::ioctl",
-       [TOMOYO_MAC_FILE_CHROOT]     = "file::chroot",
-       [TOMOYO_MAC_FILE_MOUNT]      = "file::mount",
-       [TOMOYO_MAC_FILE_UMOUNT]     = "file::umount",
-       [TOMOYO_MAC_FILE_PIVOT_ROOT] = "file::pivot_root",
-       [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file",
+/* String table for path operation. */
+const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
+       [TOMOYO_TYPE_EXECUTE]    = "execute",
+       [TOMOYO_TYPE_READ]       = "read",
+       [TOMOYO_TYPE_WRITE]      = "write",
+       [TOMOYO_TYPE_APPEND]     = "append",
+       [TOMOYO_TYPE_UNLINK]     = "unlink",
+       [TOMOYO_TYPE_GETATTR]    = "getattr",
+       [TOMOYO_TYPE_RMDIR]      = "rmdir",
+       [TOMOYO_TYPE_TRUNCATE]   = "truncate",
+       [TOMOYO_TYPE_SYMLINK]    = "symlink",
+       [TOMOYO_TYPE_CHROOT]     = "chroot",
+       [TOMOYO_TYPE_UMOUNT]     = "unmount",
+};
+
+/* String table for categories. */
+static const char * const tomoyo_category_keywords
+[TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
+       [TOMOYO_MAC_CATEGORY_FILE]       = "file",
 };
 
 /* Permit policy management by non-root user? */
@@ -71,11 +146,20 @@ static bool tomoyo_manage_by_non_root;
  *
  * @value: Bool value.
  */
-static const char *tomoyo_yesno(const unsigned int value)
+const char *tomoyo_yesno(const unsigned int value)
 {
        return value ? "yes" : "no";
 }
 
+/**
+ * tomoyo_addprintf - strncat()-like-snprintf().
+ *
+ * @buffer: Buffer to write to. Must be '\0'-terminated.
+ * @len:    Size of @buffer.
+ * @fmt:    The printf()'s format string, followed by parameters.
+ *
+ * Returns nothing.
+ */
 static void tomoyo_addprintf(char *buffer, int len, const char *fmt, ...)
 {
        va_list args;
@@ -96,7 +180,7 @@ static bool tomoyo_flush(struct tomoyo_io_buffer *head)
 {
        while (head->r.w_pos) {
                const char *w = head->r.w[0];
-               int len = strlen(w);
+               size_t len = strlen(w);
                if (len) {
                        if (len > head->read_user_buf_avail)
                                len = head->read_user_buf_avail;
@@ -111,7 +195,7 @@ static bool tomoyo_flush(struct tomoyo_io_buffer *head)
                head->r.w[0] = w;
                if (*w)
                        return false;
-               /* Add '\0' for query. */
+               /* Add '\0' for audit logs and query. */
                if (head->poll) {
                        if (!head->read_user_buf_avail ||
                            copy_to_user(head->read_user_buf, "", 1))
@@ -155,8 +239,8 @@ static void tomoyo_set_string(struct tomoyo_io_buffer *head, const char *string)
 void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
 {
        va_list args;
-       int len;
-       int pos = head->r.avail;
+       size_t len;
+       size_t pos = head->r.avail;
        int size = head->readbuf_size - pos;
        if (size <= 0)
                return;
@@ -171,11 +255,25 @@ void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
        tomoyo_set_string(head, head->read_buf + pos);
 }
 
+/**
+ * tomoyo_set_space - Put a space to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_set_space(struct tomoyo_io_buffer *head)
 {
        tomoyo_set_string(head, " ");
 }
 
+/**
+ * tomoyo_set_lf - Put a line feed to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
 static bool tomoyo_set_lf(struct tomoyo_io_buffer *head)
 {
        tomoyo_set_string(head, "\n");
@@ -183,6 +281,62 @@ static bool tomoyo_set_lf(struct tomoyo_io_buffer *head)
 }
 
 /**
+ * tomoyo_set_slash - Put a shash to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_set_slash(struct tomoyo_io_buffer *head)
+{
+       tomoyo_set_string(head, "/");
+}
+
+/* List of namespaces. */
+LIST_HEAD(tomoyo_namespace_list);
+/* True if namespace other than tomoyo_kernel_namespace is defined. */
+static bool tomoyo_namespace_enabled;
+
+/**
+ * tomoyo_init_policy_namespace - Initialize namespace.
+ *
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
+ *
+ * Returns nothing.
+ */
+void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns)
+{
+       unsigned int idx;
+       for (idx = 0; idx < TOMOYO_MAX_ACL_GROUPS; idx++)
+               INIT_LIST_HEAD(&ns->acl_group[idx]);
+       for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
+               INIT_LIST_HEAD(&ns->group_list[idx]);
+       for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
+               INIT_LIST_HEAD(&ns->policy_list[idx]);
+       ns->profile_version = 20100903;
+       tomoyo_namespace_enabled = !list_empty(&tomoyo_namespace_list);
+       list_add_tail_rcu(&ns->namespace_list, &tomoyo_namespace_list);
+}
+
+/**
+ * tomoyo_print_namespace - Print namespace header.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_namespace(struct tomoyo_io_buffer *head)
+{
+       if (!tomoyo_namespace_enabled)
+               return;
+       tomoyo_set_string(head,
+                         container_of(head->r.ns,
+                                      struct tomoyo_policy_namespace,
+                                      namespace_list)->name);
+       tomoyo_set_space(head);
+}
+
+/**
  * tomoyo_print_name_union - Print a tomoyo_name_union.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
@@ -192,7 +346,7 @@ static void tomoyo_print_name_union(struct tomoyo_io_buffer *head,
                                    const struct tomoyo_name_union *ptr)
 {
        tomoyo_set_space(head);
-       if (ptr->is_group) {
+       if (ptr->group) {
                tomoyo_set_string(head, "@");
                tomoyo_set_string(head, ptr->group->group_name->name);
        } else {
@@ -201,24 +355,46 @@ static void tomoyo_print_name_union(struct tomoyo_io_buffer *head,
 }
 
 /**
- * tomoyo_print_number_union - Print a tomoyo_number_union.
+ * tomoyo_print_name_union_quoted - Print a tomoyo_name_union with a quote.
  *
- * @head:       Pointer to "struct tomoyo_io_buffer".
- * @ptr:        Pointer to "struct tomoyo_number_union".
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr:  Pointer to "struct tomoyo_name_union".
+ *
+ * Returns nothing.
  */
-static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
-                                     const struct tomoyo_number_union *ptr)
+static void tomoyo_print_name_union_quoted(struct tomoyo_io_buffer *head,
+                                          const struct tomoyo_name_union *ptr)
 {
-       tomoyo_set_space(head);
-       if (ptr->is_group) {
+       if (ptr->group) {
+               tomoyo_set_string(head, "@");
+               tomoyo_set_string(head, ptr->group->group_name->name);
+       } else {
+               tomoyo_set_string(head, "\"");
+               tomoyo_set_string(head, ptr->filename->name);
+               tomoyo_set_string(head, "\"");
+       }
+}
+
+/**
+ * tomoyo_print_number_union_nospace - Print a tomoyo_number_union without a space.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr:  Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_number_union_nospace
+(struct tomoyo_io_buffer *head, const struct tomoyo_number_union *ptr)
+{
+       if (ptr->group) {
                tomoyo_set_string(head, "@");
                tomoyo_set_string(head, ptr->group->group_name->name);
        } else {
                int i;
                unsigned long min = ptr->values[0];
                const unsigned long max = ptr->values[1];
-               u8 min_type = ptr->min_type;
-               const u8 max_type = ptr->max_type;
+               u8 min_type = ptr->value_type[0];
+               const u8 max_type = ptr->value_type[1];
                char buffer[128];
                buffer[0] = '\0';
                for (i = 0; i < 2; i++) {
@@ -232,8 +408,8 @@ static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
                                                 "0%lo", min);
                                break;
                        default:
-                               tomoyo_addprintf(buffer, sizeof(buffer),
-                                                "%lu", min);
+                               tomoyo_addprintf(buffer, sizeof(buffer), "%lu",
+                                                min);
                                break;
                        }
                        if (min == max && min_type == max_type)
@@ -247,35 +423,53 @@ static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
 }
 
 /**
+ * tomoyo_print_number_union - Print a tomoyo_number_union.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr:  Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
+                                     const struct tomoyo_number_union *ptr)
+{
+       tomoyo_set_space(head);
+       tomoyo_print_number_union_nospace(head, ptr);
+}
+
+/**
  * tomoyo_assign_profile - Create a new profile.
  *
+ * @ns:      Pointer to "struct tomoyo_policy_namespace".
  * @profile: Profile number to create.
  *
  * Returns pointer to "struct tomoyo_profile" on success, NULL otherwise.
  */
-static struct tomoyo_profile *tomoyo_assign_profile(const unsigned int profile)
+static struct tomoyo_profile *tomoyo_assign_profile
+(struct tomoyo_policy_namespace *ns, const unsigned int profile)
 {
        struct tomoyo_profile *ptr;
        struct tomoyo_profile *entry;
        if (profile >= TOMOYO_MAX_PROFILES)
                return NULL;
-       ptr = tomoyo_profile_ptr[profile];
+       ptr = ns->profile_ptr[profile];
        if (ptr)
                return ptr;
        entry = kzalloc(sizeof(*entry), GFP_NOFS);
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
-       ptr = tomoyo_profile_ptr[profile];
+       ptr = ns->profile_ptr[profile];
        if (!ptr && tomoyo_memory_ok(entry)) {
                ptr = entry;
-               ptr->learning = &tomoyo_default_profile.preference;
-               ptr->permissive = &tomoyo_default_profile.preference;
-               ptr->enforcing = &tomoyo_default_profile.preference;
-               ptr->default_config = TOMOYO_CONFIG_DISABLED;
+               ptr->default_config = TOMOYO_CONFIG_DISABLED |
+                       TOMOYO_CONFIG_WANT_GRANT_LOG |
+                       TOMOYO_CONFIG_WANT_REJECT_LOG;
                memset(ptr->config, TOMOYO_CONFIG_USE_DEFAULT,
                       sizeof(ptr->config));
+               ptr->pref[TOMOYO_PREF_MAX_AUDIT_LOG] = 1024;
+               ptr->pref[TOMOYO_PREF_MAX_LEARNING_ENTRY] = 2048;
                mb(); /* Avoid out-of-order execution. */
-               tomoyo_profile_ptr[profile] = ptr;
+               ns->profile_ptr[profile] = ptr;
                entry = NULL;
        }
        mutex_unlock(&tomoyo_policy_lock);
@@ -287,19 +481,29 @@ static struct tomoyo_profile *tomoyo_assign_profile(const unsigned int profile)
 /**
  * tomoyo_profile - Find a profile.
  *
+ * @ns:      Pointer to "struct tomoyo_policy_namespace".
  * @profile: Profile number to find.
  *
  * Returns pointer to "struct tomoyo_profile".
  */
-struct tomoyo_profile *tomoyo_profile(const u8 profile)
+struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
+                                     const u8 profile)
 {
-       struct tomoyo_profile *ptr = tomoyo_profile_ptr[profile];
-       if (!tomoyo_policy_loaded)
-               return &tomoyo_default_profile;
-       BUG_ON(!ptr);
+       static struct tomoyo_profile tomoyo_null_profile;
+       struct tomoyo_profile *ptr = ns->profile_ptr[profile];
+       if (!ptr)
+               ptr = &tomoyo_null_profile;
        return ptr;
 }
 
+/**
+ * tomoyo_find_yesno - Find values for specified keyword.
+ *
+ * @string: String to check.
+ * @find:   Name of keyword.
+ *
+ * Returns 1 if "@find=yes" was found, 0 if "@find=no" was found, -1 otherwise.
+ */
 static s8 tomoyo_find_yesno(const char *string, const char *find)
 {
        const char *cp = strstr(string, find);
@@ -313,18 +517,15 @@ static s8 tomoyo_find_yesno(const char *string, const char *find)
        return -1;
 }
 
-static void tomoyo_set_bool(bool *b, const char *string, const char *find)
-{
-       switch (tomoyo_find_yesno(string, find)) {
-       case 1:
-               *b = true;
-               break;
-       case 0:
-               *b = false;
-               break;
-       }
-}
-
+/**
+ * tomoyo_set_uint - Set value for specified preference.
+ *
+ * @i:      Pointer to "unsigned int".
+ * @string: String to check.
+ * @find:   Name of keyword.
+ *
+ * Returns nothing.
+ */
 static void tomoyo_set_uint(unsigned int *i, const char *string,
                            const char *find)
 {
@@ -333,51 +534,16 @@ static void tomoyo_set_uint(unsigned int *i, const char *string,
                sscanf(cp + strlen(find), "=%u", i);
 }
 
-static void tomoyo_set_pref(const char *name, const char *value,
-                           const bool use_default,
-                           struct tomoyo_profile *profile)
-{
-       struct tomoyo_preference **pref;
-       bool *verbose;
-       if (!strcmp(name, "enforcing")) {
-               if (use_default) {
-                       pref = &profile->enforcing;
-                       goto set_default;
-               }
-               profile->enforcing = &profile->preference;
-               verbose = &profile->preference.enforcing_verbose;
-               goto set_verbose;
-       }
-       if (!strcmp(name, "permissive")) {
-               if (use_default) {
-                       pref = &profile->permissive;
-                       goto set_default;
-               }
-               profile->permissive = &profile->preference;
-               verbose = &profile->preference.permissive_verbose;
-               goto set_verbose;
-       }
-       if (!strcmp(name, "learning")) {
-               if (use_default) {
-                       pref = &profile->learning;
-                       goto set_default;
-               }
-               profile->learning = &profile->preference;
-               tomoyo_set_uint(&profile->preference.learning_max_entry, value,
-                            "max_entry");
-               verbose = &profile->preference.learning_verbose;
-               goto set_verbose;
-       }
-       return;
- set_default:
-       *pref = &tomoyo_default_profile.preference;
-       return;
- set_verbose:
-       tomoyo_set_bool(verbose, value, "verbose");
-}
-
+/**
+ * tomoyo_set_mode - Set mode for specified profile.
+ *
+ * @name:    Name of functionality.
+ * @value:   Mode for @name.
+ * @profile: Pointer to "struct tomoyo_profile".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_set_mode(char *name, const char *value,
-                          const bool use_default,
                           struct tomoyo_profile *profile)
 {
        u8 i;
@@ -389,7 +555,17 @@ static int tomoyo_set_mode(char *name, const char *value,
                config = 0;
                for (i = 0; i < TOMOYO_MAX_MAC_INDEX
                             + TOMOYO_MAX_MAC_CATEGORY_INDEX; i++) {
-                       if (strcmp(name, tomoyo_mac_keywords[i]))
+                       int len = 0;
+                       if (i < TOMOYO_MAX_MAC_INDEX) {
+                               const u8 c = tomoyo_index2category[i];
+                               const char *category =
+                                       tomoyo_category_keywords[c];
+                               len = strlen(category);
+                               if (strncmp(name, category, len) ||
+                                   name[len++] != ':' || name[len++] != ':')
+                                       continue;
+                       }
+                       if (strcmp(name + len, tomoyo_mac_keywords[i]))
                                continue;
                        config = profile->config[i];
                        break;
@@ -399,7 +575,7 @@ static int tomoyo_set_mode(char *name, const char *value,
        } else {
                return -EINVAL;
        }
-       if (use_default) {
+       if (strstr(value, "use_default")) {
                config = TOMOYO_CONFIG_USE_DEFAULT;
        } else {
                u8 mode;
@@ -410,6 +586,24 @@ static int tomoyo_set_mode(char *name, const char *value,
                                 * 'config' from 'TOMOYO_CONFIG_USE_DEAFULT'.
                                 */
                                config = (config & ~7) | mode;
+               if (config != TOMOYO_CONFIG_USE_DEFAULT) {
+                       switch (tomoyo_find_yesno(value, "grant_log")) {
+                       case 1:
+                               config |= TOMOYO_CONFIG_WANT_GRANT_LOG;
+                               break;
+                       case 0:
+                               config &= ~TOMOYO_CONFIG_WANT_GRANT_LOG;
+                               break;
+                       }
+                       switch (tomoyo_find_yesno(value, "reject_log")) {
+                       case 1:
+                               config |= TOMOYO_CONFIG_WANT_REJECT_LOG;
+                               break;
+                       case 0:
+                               config &= ~TOMOYO_CONFIG_WANT_REJECT_LOG;
+                               break;
+                       }
+               }
        }
        if (i < TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX)
                profile->config[i] = config;
@@ -429,34 +623,22 @@ static int tomoyo_write_profile(struct tomoyo_io_buffer *head)
 {
        char *data = head->write_buf;
        unsigned int i;
-       bool use_default = false;
        char *cp;
        struct tomoyo_profile *profile;
-       if (sscanf(data, "PROFILE_VERSION=%u", &tomoyo_profile_version) == 1)
+       if (sscanf(data, "PROFILE_VERSION=%u", &head->w.ns->profile_version)
+           == 1)
                return 0;
        i = simple_strtoul(data, &cp, 10);
-       if (data == cp) {
-               profile = &tomoyo_default_profile;
-       } else {
-               if (*cp != '-')
-                       return -EINVAL;
-               data = cp + 1;
-               profile = tomoyo_assign_profile(i);
-               if (!profile)
-                       return -EINVAL;
-       }
+       if (*cp != '-')
+               return -EINVAL;
+       data = cp + 1;
+       profile = tomoyo_assign_profile(head->w.ns, i);
+       if (!profile)
+               return -EINVAL;
        cp = strchr(data, '=');
        if (!cp)
                return -EINVAL;
        *cp++ = '\0';
-       if (profile != &tomoyo_default_profile)
-               use_default = strstr(cp, "use_default") != NULL;
-       if (tomoyo_str_starts(&data, "PREFERENCE::")) {
-               tomoyo_set_pref(data, cp, use_default, profile);
-               return 0;
-       }
-       if (profile == &tomoyo_default_profile)
-               return -EINVAL;
        if (!strcmp(data, "COMMENT")) {
                static DEFINE_SPINLOCK(lock);
                const struct tomoyo_path_info *new_comment
@@ -471,77 +653,62 @@ static int tomoyo_write_profile(struct tomoyo_io_buffer *head)
                tomoyo_put_name(old_comment);
                return 0;
        }
-       return tomoyo_set_mode(data, cp, use_default, profile);
-}
-
-static void tomoyo_print_preference(struct tomoyo_io_buffer *head,
-                                   const int idx)
-{
-       struct tomoyo_preference *pref = &tomoyo_default_profile.preference;
-       const struct tomoyo_profile *profile = idx >= 0 ?
-               tomoyo_profile_ptr[idx] : NULL;
-       char buffer[16] = "";
-       if (profile) {
-               buffer[sizeof(buffer) - 1] = '\0';
-               snprintf(buffer, sizeof(buffer) - 1, "%u-", idx);
-       }
-       if (profile) {
-               pref = profile->learning;
-               if (pref == &tomoyo_default_profile.preference)
-                       goto skip1;
-       }
-       tomoyo_io_printf(head, "%sPREFERENCE::%s={ "
-                        "verbose=%s max_entry=%u }\n",
-                        buffer, "learning",
-                        tomoyo_yesno(pref->learning_verbose),
-                        pref->learning_max_entry);
- skip1:
-       if (profile) {
-               pref = profile->permissive;
-               if (pref == &tomoyo_default_profile.preference)
-                       goto skip2;
-       }
-       tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n",
-                        buffer, "permissive",
-                        tomoyo_yesno(pref->permissive_verbose));
- skip2:
-       if (profile) {
-               pref = profile->enforcing;
-               if (pref == &tomoyo_default_profile.preference)
-                       return;
+       if (!strcmp(data, "PREFERENCE")) {
+               for (i = 0; i < TOMOYO_MAX_PREF; i++)
+                       tomoyo_set_uint(&profile->pref[i], cp,
+                                       tomoyo_pref_keywords[i]);
+               return 0;
        }
-       tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n",
-                        buffer, "enforcing",
-                        tomoyo_yesno(pref->enforcing_verbose));
+       return tomoyo_set_mode(data, cp, profile);
 }
 
+/**
+ * tomoyo_print_config - Print mode for specified functionality.
+ *
+ * @head:   Pointer to "struct tomoyo_io_buffer".
+ * @config: Mode for that functionality.
+ *
+ * Returns nothing.
+ *
+ * Caller prints functionality's name.
+ */
 static void tomoyo_print_config(struct tomoyo_io_buffer *head, const u8 config)
 {
-       tomoyo_io_printf(head, "={ mode=%s }\n", tomoyo_mode[config & 3]);
+       tomoyo_io_printf(head, "={ mode=%s grant_log=%s reject_log=%s }\n",
+                        tomoyo_mode[config & 3],
+                        tomoyo_yesno(config & TOMOYO_CONFIG_WANT_GRANT_LOG),
+                        tomoyo_yesno(config & TOMOYO_CONFIG_WANT_REJECT_LOG));
 }
 
 /**
  * tomoyo_read_profile - Read profile table.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
  */
 static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
 {
        u8 index;
+       struct tomoyo_policy_namespace *ns =
+               container_of(head->r.ns, typeof(*ns), namespace_list);
        const struct tomoyo_profile *profile;
+       if (head->r.eof)
+               return;
  next:
        index = head->r.index;
-       profile = tomoyo_profile_ptr[index];
+       profile = ns->profile_ptr[index];
        switch (head->r.step) {
        case 0:
-               tomoyo_io_printf(head, "PROFILE_VERSION=%s\n", "20090903");
-               tomoyo_print_preference(head, -1);
+               tomoyo_print_namespace(head);
+               tomoyo_io_printf(head, "PROFILE_VERSION=%u\n",
+                                ns->profile_version);
                head->r.step++;
                break;
        case 1:
                for ( ; head->r.index < TOMOYO_MAX_PROFILES;
                      head->r.index++)
-                       if (tomoyo_profile_ptr[head->r.index])
+                       if (ns->profile_ptr[head->r.index])
                                break;
                if (head->r.index == TOMOYO_MAX_PROFILES)
                        return;
@@ -549,16 +716,25 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
                break;
        case 2:
                {
+                       u8 i;
                        const struct tomoyo_path_info *comment =
                                profile->comment;
+                       tomoyo_print_namespace(head);
                        tomoyo_io_printf(head, "%u-COMMENT=", index);
                        tomoyo_set_string(head, comment ? comment->name : "");
                        tomoyo_set_lf(head);
+                       tomoyo_io_printf(head, "%u-PREFERENCE={ ", index);
+                       for (i = 0; i < TOMOYO_MAX_PREF; i++)
+                               tomoyo_io_printf(head, "%s=%u ",
+                                                tomoyo_pref_keywords[i],
+                                                profile->pref[i]);
+                       tomoyo_set_string(head, "}\n");
                        head->r.step++;
                }
                break;
        case 3:
                {
+                       tomoyo_print_namespace(head);
                        tomoyo_io_printf(head, "%u-%s", index, "CONFIG");
                        tomoyo_print_config(head, profile->default_config);
                        head->r.bit = 0;
@@ -572,15 +748,22 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
                        const u8 config = profile->config[i];
                        if (config == TOMOYO_CONFIG_USE_DEFAULT)
                                continue;
-                       tomoyo_io_printf(head, "%u-%s%s", index, "CONFIG::",
-                                        tomoyo_mac_keywords[i]);
+                       tomoyo_print_namespace(head);
+                       if (i < TOMOYO_MAX_MAC_INDEX)
+                               tomoyo_io_printf(head, "%u-CONFIG::%s::%s",
+                                                index,
+                                                tomoyo_category_keywords
+                                                [tomoyo_index2category[i]],
+                                                tomoyo_mac_keywords[i]);
+                       else
+                               tomoyo_io_printf(head, "%u-CONFIG::%s", index,
+                                                tomoyo_mac_keywords[i]);
                        tomoyo_print_config(head, config);
                        head->r.bit++;
                        break;
                }
                if (head->r.bit == TOMOYO_MAX_MAC_INDEX
                    + TOMOYO_MAX_MAC_CATEGORY_INDEX) {
-                       tomoyo_print_preference(head, index);
                        head->r.index++;
                        head->r.step = 1;
                }
@@ -590,6 +773,14 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
                goto next;
 }
 
+/**
+ * tomoyo_same_manager - Check for duplicated "struct tomoyo_manager" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_manager(const struct tomoyo_acl_head *a,
                                const struct tomoyo_acl_head *b)
 {
@@ -611,8 +802,13 @@ static int tomoyo_update_manager_entry(const char *manager,
                                       const bool is_delete)
 {
        struct tomoyo_manager e = { };
-       int error;
-
+       struct tomoyo_acl_param param = {
+               /* .ns = &tomoyo_kernel_namespace, */
+               .is_delete = is_delete,
+               .list = &tomoyo_kernel_namespace.
+               policy_list[TOMOYO_ID_MANAGER],
+       };
+       int error = is_delete ? -ENOENT : -ENOMEM;
        if (tomoyo_domain_def(manager)) {
                if (!tomoyo_correct_domain(manager))
                        return -EINVAL;
@@ -622,12 +818,11 @@ static int tomoyo_update_manager_entry(const char *manager,
                        return -EINVAL;
        }
        e.manager = tomoyo_get_name(manager);
-       if (!e.manager)
-               return -ENOMEM;
-       error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                    &tomoyo_policy_list[TOMOYO_ID_MANAGER],
-                                    tomoyo_same_manager);
-       tomoyo_put_name(e.manager);
+       if (e.manager) {
+               error = tomoyo_update_policy(&e.head, sizeof(e), &param,
+                                            tomoyo_same_manager);
+               tomoyo_put_name(e.manager);
+       }
        return error;
 }
 
@@ -643,13 +838,12 @@ static int tomoyo_update_manager_entry(const char *manager,
 static int tomoyo_write_manager(struct tomoyo_io_buffer *head)
 {
        char *data = head->write_buf;
-       bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE);
 
        if (!strcmp(data, "manage_by_non_root")) {
-               tomoyo_manage_by_non_root = !is_delete;
+               tomoyo_manage_by_non_root = !head->w.is_delete;
                return 0;
        }
-       return tomoyo_update_manager_entry(data, is_delete);
+       return tomoyo_update_manager_entry(data, head->w.is_delete);
 }
 
 /**
@@ -663,8 +857,8 @@ static void tomoyo_read_manager(struct tomoyo_io_buffer *head)
 {
        if (head->r.eof)
                return;
-       list_for_each_cookie(head->r.acl,
-                            &tomoyo_policy_list[TOMOYO_ID_MANAGER]) {
+       list_for_each_cookie(head->r.acl, &tomoyo_kernel_namespace.
+                            policy_list[TOMOYO_ID_MANAGER]) {
                struct tomoyo_manager *ptr =
                        list_entry(head->r.acl, typeof(*ptr), head.list);
                if (ptr->head.is_deleted)
@@ -697,8 +891,8 @@ static bool tomoyo_manager(void)
                return true;
        if (!tomoyo_manage_by_non_root && (task->cred->uid || task->cred->euid))
                return false;
-       list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER],
-                               head.list) {
+       list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.
+                               policy_list[TOMOYO_ID_MANAGER], head.list) {
                if (!ptr->head.is_deleted && ptr->is_domain
                    && !tomoyo_pathcmp(domainname, ptr->manager)) {
                        found = true;
@@ -710,8 +904,8 @@ static bool tomoyo_manager(void)
        exe = tomoyo_get_exe();
        if (!exe)
                return false;
-       list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER],
-                               head.list) {
+       list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.
+                               policy_list[TOMOYO_ID_MANAGER], head.list) {
                if (!ptr->head.is_deleted && !ptr->is_domain
                    && !strcmp(exe, ptr->manager->name)) {
                        found = true;
@@ -732,7 +926,7 @@ static bool tomoyo_manager(void)
 }
 
 /**
- * tomoyo_select_one - Parse select command.
+ * tomoyo_select_domain - Parse select command.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
  * @data: String to parse.
@@ -741,16 +935,15 @@ static bool tomoyo_manager(void)
  *
  * Caller holds tomoyo_read_lock().
  */
-static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data)
+static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
+                                const char *data)
 {
        unsigned int pid;
        struct tomoyo_domain_info *domain = NULL;
        bool global_pid = false;
-
-       if (!strcmp(data, "allow_execute")) {
-               head->r.print_execute_only = true;
-               return true;
-       }
+       if (strncmp(data, "select ", 7))
+               return false;
+       data += 7;
        if (sscanf(data, "pid=%u", &pid) == 1 ||
            (global_pid = true, sscanf(data, "global-pid=%u", &pid) == 1)) {
                struct task_struct *p;
@@ -769,7 +962,7 @@ static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data)
                        domain = tomoyo_find_domain(data + 7);
        } else
                return false;
-       head->write_var1 = domain;
+       head->w.domain = domain;
        /* Accessing read_buf is safe because head->io_sem is held. */
        if (!head->read_buf)
                return true; /* Do nothing if open(O_WRONLY). */
@@ -821,20 +1014,47 @@ static int tomoyo_delete_domain(char *domainname)
 /**
  * tomoyo_write_domain2 - Write domain policy.
  *
- * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ns:        Pointer to "struct tomoyo_policy_namespace".
+ * @list:      Pointer to "struct list_head".
+ * @data:      Policy to be interpreted.
+ * @is_delete: True if it is a delete request.
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_write_domain2(char *data, struct tomoyo_domain_info *domain,
+static int tomoyo_write_domain2(struct tomoyo_policy_namespace *ns,
+                               struct list_head *list, char *data,
                                const bool is_delete)
 {
-       if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_ALLOW_MOUNT))
-               return tomoyo_write_mount(data, domain, is_delete);
-       return tomoyo_write_file(data, domain, is_delete);
+       struct tomoyo_acl_param param = {
+               .ns = ns,
+               .list = list,
+               .data = data,
+               .is_delete = is_delete,
+       };
+       static const struct {
+               const char *keyword;
+               int (*write) (struct tomoyo_acl_param *);
+       } tomoyo_callback[1] = {
+               { "file ", tomoyo_write_file },
+       };
+       u8 i;
+       for (i = 0; i < 1; i++) {
+               if (!tomoyo_str_starts(&param.data,
+                                      tomoyo_callback[i].keyword))
+                       continue;
+               return tomoyo_callback[i].write(&param);
+       }
+       return -EINVAL;
 }
 
+/* String table for domain flags. */
+const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS] = {
+       [TOMOYO_DIF_QUOTA_WARNED]      = "quota_exceeded\n",
+       [TOMOYO_DIF_TRANSITION_FAILED] = "transition_failed\n",
+};
+
 /**
  * tomoyo_write_domain - Write domain policy.
  *
@@ -847,69 +1067,198 @@ static int tomoyo_write_domain2(char *data, struct tomoyo_domain_info *domain,
 static int tomoyo_write_domain(struct tomoyo_io_buffer *head)
 {
        char *data = head->write_buf;
-       struct tomoyo_domain_info *domain = head->write_var1;
-       bool is_delete = false;
-       bool is_select = false;
+       struct tomoyo_policy_namespace *ns;
+       struct tomoyo_domain_info *domain = head->w.domain;
+       const bool is_delete = head->w.is_delete;
+       bool is_select = !is_delete && tomoyo_str_starts(&data, "select ");
        unsigned int profile;
-
-       if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE))
-               is_delete = true;
-       else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_SELECT))
-               is_select = true;
-       if (is_select && tomoyo_select_one(head, data))
-               return 0;
-       /* Don't allow updating policies by non manager programs. */
-       if (!tomoyo_manager())
-               return -EPERM;
-       if (tomoyo_domain_def(data)) {
+       if (*data == '<') {
                domain = NULL;
                if (is_delete)
                        tomoyo_delete_domain(data);
                else if (is_select)
                        domain = tomoyo_find_domain(data);
                else
-                       domain = tomoyo_assign_domain(data, 0);
-               head->write_var1 = domain;
+                       domain = tomoyo_assign_domain(data, false);
+               head->w.domain = domain;
                return 0;
        }
        if (!domain)
                return -EINVAL;
-
-       if (sscanf(data, TOMOYO_KEYWORD_USE_PROFILE "%u", &profile) == 1
+       ns = domain->ns;
+       if (sscanf(data, "use_profile %u", &profile) == 1
            && profile < TOMOYO_MAX_PROFILES) {
-               if (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded)
+               if (!tomoyo_policy_loaded || ns->profile_ptr[profile])
                        domain->profile = (u8) profile;
                return 0;
        }
-       if (!strcmp(data, TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ)) {
-               domain->ignore_global_allow_read = !is_delete;
-               return 0;
-       }
-       if (!strcmp(data, TOMOYO_KEYWORD_QUOTA_EXCEEDED)) {
-               domain->quota_warned = !is_delete;
+       if (sscanf(data, "use_group %u\n", &profile) == 1
+           && profile < TOMOYO_MAX_ACL_GROUPS) {
+               if (!is_delete)
+                       domain->group = (u8) profile;
                return 0;
        }
-       if (!strcmp(data, TOMOYO_KEYWORD_TRANSITION_FAILED)) {
-               domain->transition_failed = !is_delete;
+       for (profile = 0; profile < TOMOYO_MAX_DOMAIN_INFO_FLAGS; profile++) {
+               const char *cp = tomoyo_dif[profile];
+               if (strncmp(data, cp, strlen(cp) - 1))
+                       continue;
+               domain->flags[profile] = !is_delete;
                return 0;
        }
-       return tomoyo_write_domain2(data, domain, is_delete);
+       return tomoyo_write_domain2(ns, &domain->acl_info_list, data,
+                                   is_delete);
 }
 
 /**
- * tomoyo_fns - Find next set bit.
+ * tomoyo_print_condition - Print condition part.
  *
- * @perm: 8 bits value.
- * @bit:  First bit to find.
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @cond: Pointer to "struct tomoyo_condition".
  *
- * Returns next on-bit on success, 8 otherwise.
+ * Returns true on success, false otherwise.
  */
-static u8 tomoyo_fns(const u8 perm, u8 bit)
+static bool tomoyo_print_condition(struct tomoyo_io_buffer *head,
+                                  const struct tomoyo_condition *cond)
 {
-       for ( ; bit < 8; bit++)
-               if (perm & (1 << bit))
+       switch (head->r.cond_step) {
+       case 0:
+               head->r.cond_index = 0;
+               head->r.cond_step++;
+               /* fall through */
+       case 1:
+               {
+                       const u16 condc = cond->condc;
+                       const struct tomoyo_condition_element *condp =
+                               (typeof(condp)) (cond + 1);
+                       const struct tomoyo_number_union *numbers_p =
+                               (typeof(numbers_p)) (condp + condc);
+                       const struct tomoyo_name_union *names_p =
+                               (typeof(names_p))
+                               (numbers_p + cond->numbers_count);
+                       const struct tomoyo_argv *argv =
+                               (typeof(argv)) (names_p + cond->names_count);
+                       const struct tomoyo_envp *envp =
+                               (typeof(envp)) (argv + cond->argc);
+                       u16 skip;
+                       for (skip = 0; skip < head->r.cond_index; skip++) {
+                               const u8 left = condp->left;
+                               const u8 right = condp->right;
+                               condp++;
+                               switch (left) {
+                               case TOMOYO_ARGV_ENTRY:
+                                       argv++;
+                                       continue;
+                               case TOMOYO_ENVP_ENTRY:
+                                       envp++;
+                                       continue;
+                               case TOMOYO_NUMBER_UNION:
+                                       numbers_p++;
+                                       break;
+                               }
+                               switch (right) {
+                               case TOMOYO_NAME_UNION:
+                                       names_p++;
+                                       break;
+                               case TOMOYO_NUMBER_UNION:
+                                       numbers_p++;
+                                       break;
+                               }
+                       }
+                       while (head->r.cond_index < condc) {
+                               const u8 match = condp->equals;
+                               const u8 left = condp->left;
+                               const u8 right = condp->right;
+                               if (!tomoyo_flush(head))
+                                       return false;
+                               condp++;
+                               head->r.cond_index++;
+                               tomoyo_set_space(head);
+                               switch (left) {
+                               case TOMOYO_ARGV_ENTRY:
+                                       tomoyo_io_printf(head,
+                                                        "exec.argv[%lu]%s=\"",
+                                                        argv->index, argv->
+                                                        is_not ? "!" : "");
+                                       tomoyo_set_string(head,
+                                                         argv->value->name);
+                                       tomoyo_set_string(head, "\"");
+                                       argv++;
+                                       continue;
+                               case TOMOYO_ENVP_ENTRY:
+                                       tomoyo_set_string(head,
+                                                         "exec.envp[\"");
+                                       tomoyo_set_string(head,
+                                                         envp->name->name);
+                                       tomoyo_io_printf(head, "\"]%s=", envp->
+                                                        is_not ? "!" : "");
+                                       if (envp->value) {
+                                               tomoyo_set_string(head, "\"");
+                                               tomoyo_set_string(head, envp->
+                                                                 value->name);
+                                               tomoyo_set_string(head, "\"");
+                                       } else {
+                                               tomoyo_set_string(head,
+                                                                 "NULL");
+                                       }
+                                       envp++;
+                                       continue;
+                               case TOMOYO_NUMBER_UNION:
+                                       tomoyo_print_number_union_nospace
+                                               (head, numbers_p++);
+                                       break;
+                               default:
+                                       tomoyo_set_string(head,
+                                              tomoyo_condition_keyword[left]);
+                                       break;
+                               }
+                               tomoyo_set_string(head, match ? "=" : "!=");
+                               switch (right) {
+                               case TOMOYO_NAME_UNION:
+                                       tomoyo_print_name_union_quoted
+                                               (head, names_p++);
+                                       break;
+                               case TOMOYO_NUMBER_UNION:
+                                       tomoyo_print_number_union_nospace
+                                               (head, numbers_p++);
+                                       break;
+                               default:
+                                       tomoyo_set_string(head,
+                                         tomoyo_condition_keyword[right]);
+                                       break;
+                               }
+                       }
+               }
+               head->r.cond_step++;
+               /* fall through */
+       case 2:
+               if (!tomoyo_flush(head))
                        break;
-       return bit;
+               head->r.cond_step++;
+               /* fall through */
+       case 3:
+               tomoyo_set_lf(head);
+               return true;
+       }
+       return false;
+}
+
+/**
+ * tomoyo_set_group - Print "acl_group " header keyword and category name.
+ *
+ * @head:     Pointer to "struct tomoyo_io_buffer".
+ * @category: Category name.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_set_group(struct tomoyo_io_buffer *head,
+                            const char *category)
+{
+       if (head->type == TOMOYO_EXCEPTIONPOLICY) {
+               tomoyo_print_namespace(head);
+               tomoyo_io_printf(head, "acl_group %u ",
+                                head->r.acl_group_index);
+       }
+       tomoyo_set_string(head, category);
 }
 
 /**
@@ -924,63 +1273,96 @@ static bool tomoyo_print_entry(struct tomoyo_io_buffer *head,
                               struct tomoyo_acl_info *acl)
 {
        const u8 acl_type = acl->type;
+       bool first = true;
        u8 bit;
 
+       if (head->r.print_cond_part)
+               goto print_cond_part;
        if (acl->is_deleted)
                return true;
- next:
-       bit = head->r.bit;
        if (!tomoyo_flush(head))
                return false;
        else if (acl_type == TOMOYO_TYPE_PATH_ACL) {
                struct tomoyo_path_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u16 perm = ptr->perm;
-               for ( ; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
+               for (bit = 0; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
-                       if (head->r.print_execute_only &&
+                       if (head->r.print_transition_related_only &&
                            bit != TOMOYO_TYPE_EXECUTE)
                                continue;
-                       /* Print "read/write" instead of "read" and "write". */
-                       if ((bit == TOMOYO_TYPE_READ ||
-                            bit == TOMOYO_TYPE_WRITE)
-                           && (perm & (1 << TOMOYO_TYPE_READ_WRITE)))
-                               continue;
-                       break;
+                       if (first) {
+                               tomoyo_set_group(head, "file ");
+                               first = false;
+                       } else {
+                               tomoyo_set_slash(head);
+                       }
+                       tomoyo_set_string(head, tomoyo_path_keyword[bit]);
                }
-               if (bit >= TOMOYO_MAX_PATH_OPERATION)
-                       goto done;
-               tomoyo_io_printf(head, "allow_%s", tomoyo_path_keyword[bit]);
+               if (first)
+                       return true;
                tomoyo_print_name_union(head, &ptr->name);
-       } else if (head->r.print_execute_only) {
+       } else if (head->r.print_transition_related_only) {
                return true;
        } else if (acl_type == TOMOYO_TYPE_PATH2_ACL) {
                struct tomoyo_path2_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
-               bit = tomoyo_fns(ptr->perm, bit);
-               if (bit >= TOMOYO_MAX_PATH2_OPERATION)
-                       goto done;
-               tomoyo_io_printf(head, "allow_%s", tomoyo_path2_keyword[bit]);
+               const u8 perm = ptr->perm;
+               for (bit = 0; bit < TOMOYO_MAX_PATH2_OPERATION; bit++) {
+                       if (!(perm & (1 << bit)))
+                               continue;
+                       if (first) {
+                               tomoyo_set_group(head, "file ");
+                               first = false;
+                       } else {
+                               tomoyo_set_slash(head);
+                       }
+                       tomoyo_set_string(head, tomoyo_mac_keywords
+                                         [tomoyo_pp2mac[bit]]);
+               }
+               if (first)
+                       return true;
                tomoyo_print_name_union(head, &ptr->name1);
                tomoyo_print_name_union(head, &ptr->name2);
        } else if (acl_type == TOMOYO_TYPE_PATH_NUMBER_ACL) {
                struct tomoyo_path_number_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
-               bit = tomoyo_fns(ptr->perm, bit);
-               if (bit >= TOMOYO_MAX_PATH_NUMBER_OPERATION)
-                       goto done;
-               tomoyo_io_printf(head, "allow_%s",
-                                tomoyo_path_number_keyword[bit]);
+               const u8 perm = ptr->perm;
+               for (bit = 0; bit < TOMOYO_MAX_PATH_NUMBER_OPERATION; bit++) {
+                       if (!(perm & (1 << bit)))
+                               continue;
+                       if (first) {
+                               tomoyo_set_group(head, "file ");
+                               first = false;
+                       } else {
+                               tomoyo_set_slash(head);
+                       }
+                       tomoyo_set_string(head, tomoyo_mac_keywords
+                                         [tomoyo_pn2mac[bit]]);
+               }
+               if (first)
+                       return true;
                tomoyo_print_name_union(head, &ptr->name);
                tomoyo_print_number_union(head, &ptr->number);
        } else if (acl_type == TOMOYO_TYPE_MKDEV_ACL) {
                struct tomoyo_mkdev_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
-               bit = tomoyo_fns(ptr->perm, bit);
-               if (bit >= TOMOYO_MAX_MKDEV_OPERATION)
-                       goto done;
-               tomoyo_io_printf(head, "allow_%s", tomoyo_mkdev_keyword[bit]);
+               const u8 perm = ptr->perm;
+               for (bit = 0; bit < TOMOYO_MAX_MKDEV_OPERATION; bit++) {
+                       if (!(perm & (1 << bit)))
+                               continue;
+                       if (first) {
+                               tomoyo_set_group(head, "file ");
+                               first = false;
+                       } else {
+                               tomoyo_set_slash(head);
+                       }
+                       tomoyo_set_string(head, tomoyo_mac_keywords
+                                         [tomoyo_pnnn2mac[bit]]);
+               }
+               if (first)
+                       return true;
                tomoyo_print_name_union(head, &ptr->name);
                tomoyo_print_number_union(head, &ptr->mode);
                tomoyo_print_number_union(head, &ptr->major);
@@ -988,35 +1370,41 @@ static bool tomoyo_print_entry(struct tomoyo_io_buffer *head,
        } else if (acl_type == TOMOYO_TYPE_MOUNT_ACL) {
                struct tomoyo_mount_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
-               tomoyo_io_printf(head, "allow_mount");
+               tomoyo_set_group(head, "file mount");
                tomoyo_print_name_union(head, &ptr->dev_name);
                tomoyo_print_name_union(head, &ptr->dir_name);
                tomoyo_print_name_union(head, &ptr->fs_type);
                tomoyo_print_number_union(head, &ptr->flags);
        }
-       head->r.bit = bit + 1;
-       tomoyo_io_printf(head, "\n");
-       if (acl_type != TOMOYO_TYPE_MOUNT_ACL)
-               goto next;
- done:
-       head->r.bit = 0;
+       if (acl->cond) {
+               head->r.print_cond_part = true;
+               head->r.cond_step = 0;
+               if (!tomoyo_flush(head))
+                       return false;
+print_cond_part:
+               if (!tomoyo_print_condition(head, acl->cond))
+                       return false;
+               head->r.print_cond_part = false;
+       } else {
+               tomoyo_set_lf(head);
+       }
        return true;
 }
 
 /**
  * tomoyo_read_domain2 - Read domain policy.
  *
- * @head:   Pointer to "struct tomoyo_io_buffer".
- * @domain: Pointer to "struct tomoyo_domain_info".
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @list: Pointer to "struct list_head".
  *
  * Caller holds tomoyo_read_lock().
  *
  * Returns true on success, false otherwise.
  */
 static bool tomoyo_read_domain2(struct tomoyo_io_buffer *head,
-                               struct tomoyo_domain_info *domain)
+                               struct list_head *list)
 {
-       list_for_each_cookie(head->r.acl, &domain->acl_info_list) {
+       list_for_each_cookie(head->r.acl, list) {
                struct tomoyo_acl_info *ptr =
                        list_entry(head->r.acl, typeof(*ptr), list);
                if (!tomoyo_print_entry(head, ptr))
@@ -1041,6 +1429,7 @@ static void tomoyo_read_domain(struct tomoyo_io_buffer *head)
                struct tomoyo_domain_info *domain =
                        list_entry(head->r.domain, typeof(*domain), list);
                switch (head->r.step) {
+                       u8 i;
                case 0:
                        if (domain->is_deleted &&
                            !head->r.print_this_domain_only)
@@ -1048,22 +1437,18 @@ static void tomoyo_read_domain(struct tomoyo_io_buffer *head)
                        /* Print domainname and flags. */
                        tomoyo_set_string(head, domain->domainname->name);
                        tomoyo_set_lf(head);
-                       tomoyo_io_printf(head,
-                                        TOMOYO_KEYWORD_USE_PROFILE "%u\n",
+                       tomoyo_io_printf(head, "use_profile %u\n",
                                         domain->profile);
-                       if (domain->quota_warned)
-                               tomoyo_set_string(head, "quota_exceeded\n");
-                       if (domain->transition_failed)
-                               tomoyo_set_string(head, "transition_failed\n");
-                       if (domain->ignore_global_allow_read)
-                               tomoyo_set_string(head,
-                                      TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ
-                                                 "\n");
+                       tomoyo_io_printf(head, "use_group %u\n",
+                                        domain->group);
+                       for (i = 0; i < TOMOYO_MAX_DOMAIN_INFO_FLAGS; i++)
+                               if (domain->flags[i])
+                                       tomoyo_set_string(head, tomoyo_dif[i]);
                        head->r.step++;
                        tomoyo_set_lf(head);
                        /* fall through */
                case 1:
-                       if (!tomoyo_read_domain2(head, domain))
+                       if (!tomoyo_read_domain2(head, &domain->acl_info_list))
                                return;
                        head->r.step++;
                        if (!tomoyo_set_lf(head))
@@ -1080,82 +1465,15 @@ static void tomoyo_read_domain(struct tomoyo_io_buffer *head)
 }
 
 /**
- * tomoyo_write_domain_profile - Assign profile for specified domain.
+ * tomoyo_write_pid: Specify PID to obtain domainname.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".
  *
- * Returns 0 on success, -EINVAL otherwise.
- *
- * This is equivalent to doing
- *
- *     ( echo "select " $domainname; echo "use_profile " $profile ) |
- *     /usr/sbin/tomoyo-loadpolicy -d
- *
- * Caller holds tomoyo_read_lock().
+ * Returns 0.
  */
-static int tomoyo_write_domain_profile(struct tomoyo_io_buffer *head)
+static int tomoyo_write_pid(struct tomoyo_io_buffer *head)
 {
-       char *data = head->write_buf;
-       char *cp = strchr(data, ' ');
-       struct tomoyo_domain_info *domain;
-       unsigned long profile;
-
-       if (!cp)
-               return -EINVAL;
-       *cp = '\0';
-       domain = tomoyo_find_domain(cp + 1);
-       if (strict_strtoul(data, 10, &profile))
-               return -EINVAL;
-       if (domain && profile < TOMOYO_MAX_PROFILES
-           && (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded))
-               domain->profile = (u8) profile;
-       return 0;
-}
-
-/**
- * tomoyo_read_domain_profile - Read only domainname and profile.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns list of profile number and domainname pairs.
- *
- * This is equivalent to doing
- *
- *     grep -A 1 '^<kernel>' /sys/kernel/security/tomoyo/domain_policy |
- *     awk ' { if ( domainname == "" ) { if ( $1 == "<kernel>" )
- *     domainname = $0; } else if ( $1 == "use_profile" ) {
- *     print $2 " " domainname; domainname = ""; } } ; '
- *
- * Caller holds tomoyo_read_lock().
- */
-static void tomoyo_read_domain_profile(struct tomoyo_io_buffer *head)
-{
-       if (head->r.eof)
-               return;
-       list_for_each_cookie(head->r.domain, &tomoyo_domain_list) {
-               struct tomoyo_domain_info *domain =
-                       list_entry(head->r.domain, typeof(*domain), list);
-               if (domain->is_deleted)
-                       continue;
-               if (!tomoyo_flush(head))
-                       return;
-               tomoyo_io_printf(head, "%u ", domain->profile);
-               tomoyo_set_string(head, domain->domainname->name);
-               tomoyo_set_lf(head);
-       }
-       head->r.eof = true;
-}
-
-/**
- * tomoyo_write_pid: Specify PID to obtain domainname.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns 0.
- */
-static int tomoyo_write_pid(struct tomoyo_io_buffer *head)
-{
-       head->r.eof = false;
+       head->r.eof = false;
        return 0;
 }
 
@@ -1204,18 +1522,20 @@ static void tomoyo_read_pid(struct tomoyo_io_buffer *head)
        tomoyo_set_string(head, domain->domainname->name);
 }
 
+/* String table for domain transition control keywords. */
 static const char *tomoyo_transition_type[TOMOYO_MAX_TRANSITION_TYPE] = {
-       [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE]
-       = TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN,
-       [TOMOYO_TRANSITION_CONTROL_INITIALIZE]
-       = TOMOYO_KEYWORD_INITIALIZE_DOMAIN,
-       [TOMOYO_TRANSITION_CONTROL_NO_KEEP] = TOMOYO_KEYWORD_NO_KEEP_DOMAIN,
-       [TOMOYO_TRANSITION_CONTROL_KEEP] = TOMOYO_KEYWORD_KEEP_DOMAIN
+       [TOMOYO_TRANSITION_CONTROL_NO_RESET]      = "no_reset_domain ",
+       [TOMOYO_TRANSITION_CONTROL_RESET]         = "reset_domain ",
+       [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE] = "no_initialize_domain ",
+       [TOMOYO_TRANSITION_CONTROL_INITIALIZE]    = "initialize_domain ",
+       [TOMOYO_TRANSITION_CONTROL_NO_KEEP]       = "no_keep_domain ",
+       [TOMOYO_TRANSITION_CONTROL_KEEP]          = "keep_domain ",
 };
 
+/* String table for grouping keywords. */
 static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = {
-       [TOMOYO_PATH_GROUP] = TOMOYO_KEYWORD_PATH_GROUP,
-       [TOMOYO_NUMBER_GROUP] = TOMOYO_KEYWORD_NUMBER_GROUP
+       [TOMOYO_PATH_GROUP]   = "path_group ",
+       [TOMOYO_NUMBER_GROUP] = "number_group ",
 };
 
 /**
@@ -1229,29 +1549,30 @@ static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = {
  */
 static int tomoyo_write_exception(struct tomoyo_io_buffer *head)
 {
-       char *data = head->write_buf;
-       bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE);
-       u8 i;
-       static const struct {
-               const char *keyword;
-               int (*write) (char *, const bool);
-       } tomoyo_callback[4] = {
-               { TOMOYO_KEYWORD_AGGREGATOR, tomoyo_write_aggregator },
-               { TOMOYO_KEYWORD_FILE_PATTERN, tomoyo_write_pattern },
-               { TOMOYO_KEYWORD_DENY_REWRITE, tomoyo_write_no_rewrite },
-               { TOMOYO_KEYWORD_ALLOW_READ, tomoyo_write_globally_readable },
+       const bool is_delete = head->w.is_delete;
+       struct tomoyo_acl_param param = {
+               .ns = head->w.ns,
+               .is_delete = is_delete,
+               .data = head->write_buf,
        };
-
+       u8 i;
+       if (tomoyo_str_starts(&param.data, "aggregator "))
+               return tomoyo_write_aggregator(&param);
        for (i = 0; i < TOMOYO_MAX_TRANSITION_TYPE; i++)
-               if (tomoyo_str_starts(&data, tomoyo_transition_type[i]))
-                       return tomoyo_write_transition_control(data, is_delete,
-                                                              i);
-       for (i = 0; i < 4; i++)
-               if (tomoyo_str_starts(&data, tomoyo_callback[i].keyword))
-                       return tomoyo_callback[i].write(data, is_delete);
+               if (tomoyo_str_starts(&param.data, tomoyo_transition_type[i]))
+                       return tomoyo_write_transition_control(&param, i);
        for (i = 0; i < TOMOYO_MAX_GROUP; i++)
-               if (tomoyo_str_starts(&data, tomoyo_group_name[i]))
-                       return tomoyo_write_group(data, is_delete, i);
+               if (tomoyo_str_starts(&param.data, tomoyo_group_name[i]))
+                       return tomoyo_write_group(&param, i);
+       if (tomoyo_str_starts(&param.data, "acl_group ")) {
+               unsigned int group;
+               char *data;
+               group = simple_strtoul(param.data, &data, 10);
+               if (group < TOMOYO_MAX_ACL_GROUPS && *data++ == ' ')
+                       return tomoyo_write_domain2
+                               (head->w.ns, &head->w.ns->acl_group[group],
+                                data, is_delete);
+       }
        return -EINVAL;
 }
 
@@ -1267,9 +1588,12 @@ static int tomoyo_write_exception(struct tomoyo_io_buffer *head)
  */
 static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx)
 {
-       list_for_each_cookie(head->r.group, &tomoyo_group_list[idx]) {
+       struct tomoyo_policy_namespace *ns =
+               container_of(head->r.ns, typeof(*ns), namespace_list);
+       struct list_head *list = &ns->group_list[idx];
+       list_for_each_cookie(head->r.group, list) {
                struct tomoyo_group *group =
-                       list_entry(head->r.group, typeof(*group), list);
+                       list_entry(head->r.group, typeof(*group), head.list);
                list_for_each_cookie(head->r.acl, &group->member_list) {
                        struct tomoyo_acl_head *ptr =
                                list_entry(head->r.acl, typeof(*ptr), list);
@@ -1277,6 +1601,7 @@ static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx)
                                continue;
                        if (!tomoyo_flush(head))
                                return false;
+                       tomoyo_print_namespace(head);
                        tomoyo_set_string(head, tomoyo_group_name[idx]);
                        tomoyo_set_string(head, group->group_name->name);
                        if (idx == TOMOYO_PATH_GROUP) {
@@ -1310,7 +1635,10 @@ static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx)
  */
 static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
 {
-       list_for_each_cookie(head->r.acl, &tomoyo_policy_list[idx]) {
+       struct tomoyo_policy_namespace *ns =
+               container_of(head->r.ns, typeof(*ns), namespace_list);
+       struct list_head *list = &ns->policy_list[idx];
+       list_for_each_cookie(head->r.acl, list) {
                struct tomoyo_acl_head *acl =
                        container_of(head->r.acl, typeof(*acl), list);
                if (acl->is_deleted)
@@ -1322,35 +1650,23 @@ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
                        {
                                struct tomoyo_transition_control *ptr =
                                        container_of(acl, typeof(*ptr), head);
-                               tomoyo_set_string(head,
-                                                 tomoyo_transition_type
+                               tomoyo_print_namespace(head);
+                               tomoyo_set_string(head, tomoyo_transition_type
                                                  [ptr->type]);
-                               if (ptr->program)
-                                       tomoyo_set_string(head,
-                                                         ptr->program->name);
-                               if (ptr->program && ptr->domainname)
-                                       tomoyo_set_string(head, " from ");
-                               if (ptr->domainname)
-                                       tomoyo_set_string(head,
-                                                         ptr->domainname->
-                                                         name);
-                       }
-                       break;
-               case TOMOYO_ID_GLOBALLY_READABLE:
-                       {
-                               struct tomoyo_readable_file *ptr =
-                                       container_of(acl, typeof(*ptr), head);
-                               tomoyo_set_string(head,
-                                                 TOMOYO_KEYWORD_ALLOW_READ);
-                               tomoyo_set_string(head, ptr->filename->name);
+                               tomoyo_set_string(head, ptr->program ?
+                                                 ptr->program->name : "any");
+                               tomoyo_set_string(head, " from ");
+                               tomoyo_set_string(head, ptr->domainname ?
+                                                 ptr->domainname->name :
+                                                 "any");
                        }
                        break;
                case TOMOYO_ID_AGGREGATOR:
                        {
                                struct tomoyo_aggregator *ptr =
                                        container_of(acl, typeof(*ptr), head);
-                               tomoyo_set_string(head,
-                                                 TOMOYO_KEYWORD_AGGREGATOR);
+                               tomoyo_print_namespace(head);
+                               tomoyo_set_string(head, "aggregator ");
                                tomoyo_set_string(head,
                                                  ptr->original_name->name);
                                tomoyo_set_space(head);
@@ -1358,24 +1674,6 @@ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
                                               ptr->aggregated_name->name);
                        }
                        break;
-               case TOMOYO_ID_PATTERN:
-                       {
-                               struct tomoyo_no_pattern *ptr =
-                                       container_of(acl, typeof(*ptr), head);
-                               tomoyo_set_string(head,
-                                                 TOMOYO_KEYWORD_FILE_PATTERN);
-                               tomoyo_set_string(head, ptr->pattern->name);
-                       }
-                       break;
-               case TOMOYO_ID_NO_REWRITE:
-                       {
-                               struct tomoyo_no_rewrite *ptr =
-                                       container_of(acl, typeof(*ptr), head);
-                               tomoyo_set_string(head,
-                                                 TOMOYO_KEYWORD_DENY_REWRITE);
-                               tomoyo_set_string(head, ptr->pattern->name);
-                       }
-                       break;
                default:
                        continue;
                }
@@ -1394,6 +1692,8 @@ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
  */
 static void tomoyo_read_exception(struct tomoyo_io_buffer *head)
 {
+       struct tomoyo_policy_namespace *ns =
+               container_of(head->r.ns, typeof(*ns), namespace_list);
        if (head->r.eof)
                return;
        while (head->r.step < TOMOYO_MAX_POLICY &&
@@ -1406,95 +1706,40 @@ static void tomoyo_read_exception(struct tomoyo_io_buffer *head)
                head->r.step++;
        if (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP)
                return;
+       while (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP
+              + TOMOYO_MAX_ACL_GROUPS) {
+               head->r.acl_group_index = head->r.step - TOMOYO_MAX_POLICY
+                       - TOMOYO_MAX_GROUP;
+               if (!tomoyo_read_domain2(head, &ns->acl_group
+                                        [head->r.acl_group_index]))
+                       return;
+               head->r.step++;
+       }
        head->r.eof = true;
 }
 
-/**
- * tomoyo_print_header - Get header line of audit log.
- *
- * @r: Pointer to "struct tomoyo_request_info".
- *
- * Returns string representation.
- *
- * This function uses kmalloc(), so caller must kfree() if this function
- * didn't return NULL.
- */
-static char *tomoyo_print_header(struct tomoyo_request_info *r)
-{
-       struct timeval tv;
-       const pid_t gpid = task_pid_nr(current);
-       static const int tomoyo_buffer_len = 4096;
-       char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
-       pid_t ppid;
-       if (!buffer)
-               return NULL;
-       do_gettimeofday(&tv);
-       rcu_read_lock();
-       ppid = task_tgid_vnr(current->real_parent);
-       rcu_read_unlock();
-       snprintf(buffer, tomoyo_buffer_len - 1,
-                "#timestamp=%lu profile=%u mode=%s (global-pid=%u)"
-                " task={ pid=%u ppid=%u uid=%u gid=%u euid=%u"
-                " egid=%u suid=%u sgid=%u fsuid=%u fsgid=%u }",
-                tv.tv_sec, r->profile, tomoyo_mode[r->mode], gpid,
-                task_tgid_vnr(current), ppid,
-                current_uid(), current_gid(), current_euid(),
-                current_egid(), current_suid(), current_sgid(),
-                current_fsuid(), current_fsgid());
-       return buffer;
-}
-
-/**
- * tomoyo_init_audit_log - Allocate buffer for audit logs.
- *
- * @len: Required size.
- * @r:   Pointer to "struct tomoyo_request_info".
- *
- * Returns pointer to allocated memory.
- *
- * The @len is updated to add the header lines' size on success.
- *
- * This function uses kzalloc(), so caller must kfree() if this function
- * didn't return NULL.
- */
-static char *tomoyo_init_audit_log(int *len, struct tomoyo_request_info *r)
-{
-       char *buf = NULL;
-       const char *header;
-       const char *domainname;
-       if (!r->domain)
-               r->domain = tomoyo_domain();
-       domainname = r->domain->domainname->name;
-       header = tomoyo_print_header(r);
-       if (!header)
-               return NULL;
-       *len += strlen(domainname) + strlen(header) + 10;
-       buf = kzalloc(*len, GFP_NOFS);
-       if (buf)
-               snprintf(buf, (*len) - 1, "%s\n%s\n", header, domainname);
-       kfree(header);
-       return buf;
-}
-
-/* Wait queue for tomoyo_query_list. */
+/* Wait queue for kernel -> userspace notification. */
 static DECLARE_WAIT_QUEUE_HEAD(tomoyo_query_wait);
-
-/* Lock for manipulating tomoyo_query_list. */
-static DEFINE_SPINLOCK(tomoyo_query_list_lock);
+/* Wait queue for userspace -> kernel notification. */
+static DECLARE_WAIT_QUEUE_HEAD(tomoyo_answer_wait);
 
 /* Structure for query. */
 struct tomoyo_query {
        struct list_head list;
        char *query;
-       int query_len;
+       size_t query_len;
        unsigned int serial;
-       int timer;
-       int answer;
+       u8 timer;
+       u8 answer;
+       u8 retry;
 };
 
 /* The list for "struct tomoyo_query". */
 static LIST_HEAD(tomoyo_query_list);
 
+/* Lock for manipulating tomoyo_query_list. */
+static DEFINE_SPINLOCK(tomoyo_query_list_lock);
+
 /*
  * Number of "struct file" referring /sys/kernel/security/tomoyo/query
  * interface.
@@ -1502,10 +1747,82 @@ static LIST_HEAD(tomoyo_query_list);
 static atomic_t tomoyo_query_observers = ATOMIC_INIT(0);
 
 /**
+ * tomoyo_truncate - Truncate a line.
+ *
+ * @str: String to truncate.
+ *
+ * Returns length of truncated @str.
+ */
+static int tomoyo_truncate(char *str)
+{
+       char *start = str;
+       while (*(unsigned char *) str > (unsigned char) ' ')
+               str++;
+       *str = '\0';
+       return strlen(start) + 1;
+}
+
+/**
+ * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode.
+ *
+ * @domain: Pointer to "struct tomoyo_domain_info".
+ * @header: Lines containing ACL.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header)
+{
+       char *buffer;
+       char *realpath = NULL;
+       char *argv0 = NULL;
+       char *symlink = NULL;
+       char *cp = strchr(header, '\n');
+       int len;
+       if (!cp)
+               return;
+       cp = strchr(cp + 1, '\n');
+       if (!cp)
+               return;
+       *cp++ = '\0';
+       len = strlen(cp) + 1;
+       /* strstr() will return NULL if ordering is wrong. */
+       if (*cp == 'f') {
+               argv0 = strstr(header, " argv[]={ \"");
+               if (argv0) {
+                       argv0 += 10;
+                       len += tomoyo_truncate(argv0) + 14;
+               }
+               realpath = strstr(header, " exec={ realpath=\"");
+               if (realpath) {
+                       realpath += 8;
+                       len += tomoyo_truncate(realpath) + 6;
+               }
+               symlink = strstr(header, " symlink.target=\"");
+               if (symlink)
+                       len += tomoyo_truncate(symlink + 1) + 1;
+       }
+       buffer = kmalloc(len, GFP_NOFS);
+       if (!buffer)
+               return;
+       snprintf(buffer, len - 1, "%s", cp);
+       if (realpath)
+               tomoyo_addprintf(buffer, len, " exec.%s", realpath);
+       if (argv0)
+               tomoyo_addprintf(buffer, len, " exec.argv[0]=%s", argv0);
+       if (symlink)
+               tomoyo_addprintf(buffer, len, "%s", symlink);
+       tomoyo_normalize_line(buffer);
+       if (!tomoyo_write_domain2(domain->ns, &domain->acl_info_list, buffer,
+                                 false))
+               tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
+       kfree(buffer);
+}
+
+/**
  * tomoyo_supervisor - Ask for the supervisor's decision.
  *
- * @r:       Pointer to "struct tomoyo_request_info".
- * @fmt:     The printf()'s format string, followed by parameters.
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @fmt: The printf()'s format string, followed by parameters.
  *
  * Returns 0 if the supervisor decided to permit the access request which
  * violated the policy in enforcing mode, TOMOYO_RETRY_REQUEST if the
@@ -1515,88 +1832,79 @@ static atomic_t tomoyo_query_observers = ATOMIC_INIT(0);
 int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
 {
        va_list args;
-       int error = -EPERM;
-       int pos;
+       int error;
        int len;
        static unsigned int tomoyo_serial;
-       struct tomoyo_query *entry = NULL;
+       struct tomoyo_query entry = { };
        bool quota_exceeded = false;
-       char *header;
+       va_start(args, fmt);
+       len = vsnprintf((char *) &len, 1, fmt, args) + 1;
+       va_end(args);
+       /* Write /sys/kernel/security/tomoyo/audit. */
+       va_start(args, fmt);
+       tomoyo_write_log2(r, len, fmt, args);
+       va_end(args);
+       /* Nothing more to do if granted. */
+       if (r->granted)
+               return 0;
+       if (r->mode)
+               tomoyo_update_stat(r->mode);
        switch (r->mode) {
-               char *buffer;
+       case TOMOYO_CONFIG_ENFORCING:
+               error = -EPERM;
+               if (atomic_read(&tomoyo_query_observers))
+                       break;
+               goto out;
        case TOMOYO_CONFIG_LEARNING:
-               if (!tomoyo_domain_quota_is_ok(r))
-                       return 0;
-               va_start(args, fmt);
-               len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 4;
-               va_end(args);
-               buffer = kmalloc(len, GFP_NOFS);
-               if (!buffer)
-                       return 0;
-               va_start(args, fmt);
-               vsnprintf(buffer, len - 1, fmt, args);
-               va_end(args);
-               tomoyo_normalize_line(buffer);
-               tomoyo_write_domain2(buffer, r->domain, false);
-               kfree(buffer);
+               error = 0;
+               /* Check max_learning_entry parameter. */
+               if (tomoyo_domain_quota_is_ok(r))
+                       break;
                /* fall through */
-       case TOMOYO_CONFIG_PERMISSIVE:
+       default:
                return 0;
        }
-       if (!r->domain)
-               r->domain = tomoyo_domain();
-       if (!atomic_read(&tomoyo_query_observers))
-               return -EPERM;
+       /* Get message. */
        va_start(args, fmt);
-       len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 32;
+       entry.query = tomoyo_init_log(r, len, fmt, args);
        va_end(args);
-       header = tomoyo_init_audit_log(&len, r);
-       if (!header)
+       if (!entry.query)
                goto out;
-       entry = kzalloc(sizeof(*entry), GFP_NOFS);
-       if (!entry)
-               goto out;
-       entry->query = kzalloc(len, GFP_NOFS);
-       if (!entry->query)
+       entry.query_len = strlen(entry.query) + 1;
+       if (!error) {
+               tomoyo_add_entry(r->domain, entry.query);
                goto out;
-       len = ksize(entry->query);
+       }
+       len = tomoyo_round2(entry.query_len);
        spin_lock(&tomoyo_query_list_lock);
-       if (tomoyo_quota_for_query && tomoyo_query_memory_size + len +
-           sizeof(*entry) >= tomoyo_quota_for_query) {
+       if (tomoyo_memory_quota[TOMOYO_MEMORY_QUERY] &&
+           tomoyo_memory_used[TOMOYO_MEMORY_QUERY] + len
+           >= tomoyo_memory_quota[TOMOYO_MEMORY_QUERY]) {
                quota_exceeded = true;
        } else {
-               tomoyo_query_memory_size += len + sizeof(*entry);
-               entry->serial = tomoyo_serial++;
+               entry.serial = tomoyo_serial++;
+               entry.retry = r->retry;
+               tomoyo_memory_used[TOMOYO_MEMORY_QUERY] += len;
+               list_add_tail(&entry.list, &tomoyo_query_list);
        }
        spin_unlock(&tomoyo_query_list_lock);
        if (quota_exceeded)
                goto out;
-       pos = snprintf(entry->query, len - 1, "Q%u-%hu\n%s",
-                      entry->serial, r->retry, header);
-       kfree(header);
-       header = NULL;
-       va_start(args, fmt);
-       vsnprintf(entry->query + pos, len - 1 - pos, fmt, args);
-       entry->query_len = strlen(entry->query) + 1;
-       va_end(args);
-       spin_lock(&tomoyo_query_list_lock);
-       list_add_tail(&entry->list, &tomoyo_query_list);
-       spin_unlock(&tomoyo_query_list_lock);
        /* Give 10 seconds for supervisor's opinion. */
-       for (entry->timer = 0;
-            atomic_read(&tomoyo_query_observers) && entry->timer < 100;
-            entry->timer++) {
-               wake_up(&tomoyo_query_wait);
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(HZ / 10);
-               if (entry->answer)
+       while (entry.timer < 10) {
+               wake_up_all(&tomoyo_query_wait);
+               if (wait_event_interruptible_timeout
+                   (tomoyo_answer_wait, entry.answer ||
+                    !atomic_read(&tomoyo_query_observers), HZ))
                        break;
+               else
+                       entry.timer++;
        }
        spin_lock(&tomoyo_query_list_lock);
-       list_del(&entry->list);
-       tomoyo_query_memory_size -= len + sizeof(*entry);
+       list_del(&entry.list);
+       tomoyo_memory_used[TOMOYO_MEMORY_QUERY] -= len;
        spin_unlock(&tomoyo_query_list_lock);
-       switch (entry->answer) {
+       switch (entry.answer) {
        case 3: /* Asked to retry by administrator. */
                error = TOMOYO_RETRY_REQUEST;
                r->retry++;
@@ -1605,18 +1913,12 @@ int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
                /* Granted by administrator. */
                error = 0;
                break;
-       case 0:
-               /* Timed out. */
-               break;
        default:
-               /* Rejected by administrator. */
+               /* Timed out or rejected by administrator. */
                break;
        }
- out:
-       if (entry)
-               kfree(entry->query);
-       kfree(entry);
-       kfree(header);
+out:
+       kfree(entry.query);
        return error;
 }
 
@@ -1663,8 +1965,8 @@ static int tomoyo_poll_query(struct file *file, poll_table *wait)
 static void tomoyo_read_query(struct tomoyo_io_buffer *head)
 {
        struct list_head *tmp;
-       int pos = 0;
-       int len = 0;
+       unsigned int pos = 0;
+       size_t len = 0;
        char *buf;
        if (head->r.w_pos)
                return;
@@ -1687,7 +1989,7 @@ static void tomoyo_read_query(struct tomoyo_io_buffer *head)
                head->r.query_index = 0;
                return;
        }
-       buf = kzalloc(len, GFP_NOFS);
+       buf = kzalloc(len + 32, GFP_NOFS);
        if (!buf)
                return;
        pos = 0;
@@ -1703,7 +2005,8 @@ static void tomoyo_read_query(struct tomoyo_io_buffer *head)
                 * can change, but I don't care.
                 */
                if (len == ptr->query_len)
-                       memmove(buf, ptr->query, len);
+                       snprintf(buf, len + 31, "Q%u-%hu\n%s", ptr->serial,
+                                ptr->retry, ptr->query);
                break;
        }
        spin_unlock(&tomoyo_query_list_lock);
@@ -1760,7 +2063,7 @@ static int tomoyo_write_answer(struct tomoyo_io_buffer *head)
 static void tomoyo_read_version(struct tomoyo_io_buffer *head)
 {
        if (!head->r.eof) {
-               tomoyo_io_printf(head, "2.3.0");
+               tomoyo_io_printf(head, "2.4.0");
                head->r.eof = true;
        }
 }
@@ -1785,15 +2088,111 @@ static void tomoyo_read_self_domain(struct tomoyo_io_buffer *head)
        }
 }
 
+/* String table for /sys/kernel/security/tomoyo/stat interface. */
+static const char * const tomoyo_policy_headers[TOMOYO_MAX_POLICY_STAT] = {
+       [TOMOYO_STAT_POLICY_UPDATES]    = "update:",
+       [TOMOYO_STAT_POLICY_LEARNING]   = "violation in learning mode:",
+       [TOMOYO_STAT_POLICY_PERMISSIVE] = "violation in permissive mode:",
+       [TOMOYO_STAT_POLICY_ENFORCING]  = "violation in enforcing mode:",
+};
+
+/* String table for /sys/kernel/security/tomoyo/stat interface. */
+static const char * const tomoyo_memory_headers[TOMOYO_MAX_MEMORY_STAT] = {
+       [TOMOYO_MEMORY_POLICY] = "policy:",
+       [TOMOYO_MEMORY_AUDIT]  = "audit log:",
+       [TOMOYO_MEMORY_QUERY]  = "query message:",
+};
+
+/* Timestamp counter for last updated. */
+static unsigned int tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT];
+/* Counter for number of updates. */
+static unsigned int tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT];
+
+/**
+ * tomoyo_update_stat - Update statistic counters.
+ *
+ * @index: Index for policy type.
+ *
+ * Returns nothing.
+ */
+void tomoyo_update_stat(const u8 index)
+{
+       struct timeval tv;
+       do_gettimeofday(&tv);
+       /*
+        * I don't use atomic operations because race condition is not fatal.
+        */
+       tomoyo_stat_updated[index]++;
+       tomoyo_stat_modified[index] = tv.tv_sec;
+}
+
+/**
+ * tomoyo_read_stat - Read statistic data.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_read_stat(struct tomoyo_io_buffer *head)
+{
+       u8 i;
+       unsigned int total = 0;
+       if (head->r.eof)
+               return;
+       for (i = 0; i < TOMOYO_MAX_POLICY_STAT; i++) {
+               tomoyo_io_printf(head, "Policy %-30s %10u",
+                                tomoyo_policy_headers[i],
+                                tomoyo_stat_updated[i]);
+               if (tomoyo_stat_modified[i]) {
+                       struct tomoyo_time stamp;
+                       tomoyo_convert_time(tomoyo_stat_modified[i], &stamp);
+                       tomoyo_io_printf(head, " (Last: %04u/%02u/%02u "
+                                        "%02u:%02u:%02u)",
+                                        stamp.year, stamp.month, stamp.day,
+                                        stamp.hour, stamp.min, stamp.sec);
+               }
+               tomoyo_set_lf(head);
+       }
+       for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++) {
+               unsigned int used = tomoyo_memory_used[i];
+               total += used;
+               tomoyo_io_printf(head, "Memory used by %-22s %10u",
+                                tomoyo_memory_headers[i], used);
+               used = tomoyo_memory_quota[i];
+               if (used)
+                       tomoyo_io_printf(head, " (Quota: %10u)", used);
+               tomoyo_set_lf(head);
+       }
+       tomoyo_io_printf(head, "Total memory used:                    %10u\n",
+                        total);
+       head->r.eof = true;
+}
+
+/**
+ * tomoyo_write_stat - Set memory quota.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns 0.
+ */
+static int tomoyo_write_stat(struct tomoyo_io_buffer *head)
+{
+       char *data = head->write_buf;
+       u8 i;
+       if (tomoyo_str_starts(&data, "Memory used by "))
+               for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++)
+                       if (tomoyo_str_starts(&data, tomoyo_memory_headers[i]))
+                               sscanf(data, "%u", &tomoyo_memory_quota[i]);
+       return 0;
+}
+
 /**
  * tomoyo_open_control - open() for /sys/kernel/security/tomoyo/ interface.
  *
  * @type: Type of interface.
  * @file: Pointer to "struct file".
  *
- * Associates policy handler and returns 0 on success, -ENOMEM otherwise.
- *
- * Caller acquires tomoyo_read_lock().
+ * Returns 0 on success, negative value otherwise.
  */
 int tomoyo_open_control(const u8 type, struct file *file)
 {
@@ -1814,15 +2213,15 @@ int tomoyo_open_control(const u8 type, struct file *file)
                head->write = tomoyo_write_exception;
                head->read = tomoyo_read_exception;
                break;
+       case TOMOYO_AUDIT:
+               /* /sys/kernel/security/tomoyo/audit */
+               head->poll = tomoyo_poll_log;
+               head->read = tomoyo_read_log;
+               break;
        case TOMOYO_SELFDOMAIN:
                /* /sys/kernel/security/tomoyo/self_domain */
                head->read = tomoyo_read_self_domain;
                break;
-       case TOMOYO_DOMAIN_STATUS:
-               /* /sys/kernel/security/tomoyo/.domain_status */
-               head->write = tomoyo_write_domain_profile;
-               head->read = tomoyo_read_domain_profile;
-               break;
        case TOMOYO_PROCESS_STATUS:
                /* /sys/kernel/security/tomoyo/.process_status */
                head->write = tomoyo_write_pid;
@@ -1833,11 +2232,11 @@ int tomoyo_open_control(const u8 type, struct file *file)
                head->read = tomoyo_read_version;
                head->readbuf_size = 128;
                break;
-       case TOMOYO_MEMINFO:
-               /* /sys/kernel/security/tomoyo/meminfo */
-               head->write = tomoyo_write_memory_quota;
-               head->read = tomoyo_read_memory_counter;
-               head->readbuf_size = 512;
+       case TOMOYO_STAT:
+               /* /sys/kernel/security/tomoyo/stat */
+               head->write = tomoyo_write_stat;
+               head->read = tomoyo_read_stat;
+               head->readbuf_size = 1024;
                break;
        case TOMOYO_PROFILE:
                /* /sys/kernel/security/tomoyo/profile */
@@ -1887,26 +2286,16 @@ int tomoyo_open_control(const u8 type, struct file *file)
                        return -ENOMEM;
                }
        }
-       if (type != TOMOYO_QUERY)
-               head->reader_idx = tomoyo_read_lock();
-       file->private_data = head;
-       /*
-        * Call the handler now if the file is
-        * /sys/kernel/security/tomoyo/self_domain
-        * so that the user can use
-        * cat < /sys/kernel/security/tomoyo/self_domain"
-        * to know the current process's domainname.
-        */
-       if (type == TOMOYO_SELFDOMAIN)
-               tomoyo_read_control(file, NULL, 0);
        /*
         * If the file is /sys/kernel/security/tomoyo/query , increment the
         * observer counter.
         * The obserber counter is used by tomoyo_supervisor() to see if
         * there is some process monitoring /sys/kernel/security/tomoyo/query.
         */
-       else if (type == TOMOYO_QUERY)
+       if (type == TOMOYO_QUERY)
                atomic_inc(&tomoyo_query_observers);
+       file->private_data = head;
+       tomoyo_notify_gc(head, true);
        return 0;
 }
 
@@ -1917,7 +2306,8 @@ int tomoyo_open_control(const u8 type, struct file *file)
  * @wait: Pointer to "poll_table".
  *
  * Waits for read readiness.
- * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd .
+ * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd and
+ * /sys/kernel/security/tomoyo/audit is handled by /usr/sbin/tomoyo-auditd.
  */
 int tomoyo_poll_control(struct file *file, poll_table *wait)
 {
@@ -1928,21 +2318,58 @@ int tomoyo_poll_control(struct file *file, poll_table *wait)
 }
 
 /**
+ * tomoyo_set_namespace_cursor - Set namespace to read.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static inline void tomoyo_set_namespace_cursor(struct tomoyo_io_buffer *head)
+{
+       struct list_head *ns;
+       if (head->type != TOMOYO_EXCEPTIONPOLICY &&
+           head->type != TOMOYO_PROFILE)
+               return;
+       /*
+        * If this is the first read, or reading previous namespace finished
+        * and has more namespaces to read, update the namespace cursor.
+        */
+       ns = head->r.ns;
+       if (!ns || (head->r.eof && ns->next != &tomoyo_namespace_list)) {
+               /* Clearing is OK because tomoyo_flush() returned true. */
+               memset(&head->r, 0, sizeof(head->r));
+               head->r.ns = ns ? ns->next : tomoyo_namespace_list.next;
+       }
+}
+
+/**
+ * tomoyo_has_more_namespace - Check for unread namespaces.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns true if we have more entries to print, false otherwise.
+ */
+static inline bool tomoyo_has_more_namespace(struct tomoyo_io_buffer *head)
+{
+       return (head->type == TOMOYO_EXCEPTIONPOLICY ||
+               head->type == TOMOYO_PROFILE) && head->r.eof &&
+               head->r.ns->next != &tomoyo_namespace_list;
+}
+
+/**
  * tomoyo_read_control - read() for /sys/kernel/security/tomoyo/ interface.
  *
- * @file:       Pointer to "struct file".
+ * @head:       Pointer to "struct tomoyo_io_buffer".
  * @buffer:     Poiner to buffer to write to.
  * @buffer_len: Size of @buffer.
  *
  * Returns bytes read on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
  */
-int tomoyo_read_control(struct file *file, char __user *buffer,
-                       const int buffer_len)
+ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
+                           const int buffer_len)
 {
        int len;
-       struct tomoyo_io_buffer *head = file->private_data;
+       int idx;
 
        if (!head->read)
                return -ENOSYS;
@@ -1950,64 +2377,156 @@ int tomoyo_read_control(struct file *file, char __user *buffer,
                return -EINTR;
        head->read_user_buf = buffer;
        head->read_user_buf_avail = buffer_len;
+       idx = tomoyo_read_lock();
        if (tomoyo_flush(head))
                /* Call the policy handler. */
-               head->read(head);
-       tomoyo_flush(head);
+               do {
+                       tomoyo_set_namespace_cursor(head);
+                       head->read(head);
+               } while (tomoyo_flush(head) &&
+                        tomoyo_has_more_namespace(head));
+       tomoyo_read_unlock(idx);
        len = head->read_user_buf - buffer;
        mutex_unlock(&head->io_sem);
        return len;
 }
 
 /**
+ * tomoyo_parse_policy - Parse a policy line.
+ *
+ * @head: Poiter to "struct tomoyo_io_buffer".
+ * @line: Line to parse.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+static int tomoyo_parse_policy(struct tomoyo_io_buffer *head, char *line)
+{
+       /* Delete request? */
+       head->w.is_delete = !strncmp(line, "delete ", 7);
+       if (head->w.is_delete)
+               memmove(line, line + 7, strlen(line + 7) + 1);
+       /* Selecting namespace to update. */
+       if (head->type == TOMOYO_EXCEPTIONPOLICY ||
+           head->type == TOMOYO_PROFILE) {
+               if (*line == '<') {
+                       char *cp = strchr(line, ' ');
+                       if (cp) {
+                               *cp++ = '\0';
+                               head->w.ns = tomoyo_assign_namespace(line);
+                               memmove(line, cp, strlen(cp) + 1);
+                       } else
+                               head->w.ns = NULL;
+               } else
+                       head->w.ns = &tomoyo_kernel_namespace;
+               /* Don't allow updating if namespace is invalid. */
+               if (!head->w.ns)
+                       return -ENOENT;
+       }
+       /* Do the update. */
+       return head->write(head);
+}
+
+/**
  * tomoyo_write_control - write() for /sys/kernel/security/tomoyo/ interface.
  *
- * @file:       Pointer to "struct file".
+ * @head:       Pointer to "struct tomoyo_io_buffer".
  * @buffer:     Pointer to buffer to read from.
  * @buffer_len: Size of @buffer.
  *
  * Returns @buffer_len on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_control(struct file *file, const char __user *buffer,
-                        const int buffer_len)
+ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+                            const char __user *buffer, const int buffer_len)
 {
-       struct tomoyo_io_buffer *head = file->private_data;
        int error = buffer_len;
-       int avail_len = buffer_len;
+       size_t avail_len = buffer_len;
        char *cp0 = head->write_buf;
-
+       int idx;
        if (!head->write)
                return -ENOSYS;
        if (!access_ok(VERIFY_READ, buffer, buffer_len))
                return -EFAULT;
-       /* Don't allow updating policies by non manager programs. */
-       if (head->write != tomoyo_write_pid &&
-           head->write != tomoyo_write_domain && !tomoyo_manager())
-               return -EPERM;
        if (mutex_lock_interruptible(&head->io_sem))
                return -EINTR;
+       idx = tomoyo_read_lock();
        /* Read a line and dispatch it to the policy handler. */
        while (avail_len > 0) {
                char c;
-               if (head->write_avail >= head->writebuf_size - 1) {
-                       error = -ENOMEM;
-                       break;
-               } else if (get_user(c, buffer)) {
+               if (head->w.avail >= head->writebuf_size - 1) {
+                       const int len = head->writebuf_size * 2;
+                       char *cp = kzalloc(len, GFP_NOFS);
+                       if (!cp) {
+                               error = -ENOMEM;
+                               break;
+                       }
+                       memmove(cp, cp0, head->w.avail);
+                       kfree(cp0);
+                       head->write_buf = cp;
+                       cp0 = cp;
+                       head->writebuf_size = len;
+               }
+               if (get_user(c, buffer)) {
                        error = -EFAULT;
                        break;
                }
                buffer++;
                avail_len--;
-               cp0[head->write_avail++] = c;
+               cp0[head->w.avail++] = c;
                if (c != '\n')
                        continue;
-               cp0[head->write_avail - 1] = '\0';
-               head->write_avail = 0;
+               cp0[head->w.avail - 1] = '\0';
+               head->w.avail = 0;
                tomoyo_normalize_line(cp0);
-               head->write(head);
+               if (!strcmp(cp0, "reset")) {
+                       head->w.ns = &tomoyo_kernel_namespace;
+                       head->w.domain = NULL;
+                       memset(&head->r, 0, sizeof(head->r));
+                       continue;
+               }
+               /* Don't allow updating policies by non manager programs. */
+               switch (head->type) {
+               case TOMOYO_PROCESS_STATUS:
+                       /* This does not write anything. */
+                       break;
+               case TOMOYO_DOMAINPOLICY:
+                       if (tomoyo_select_domain(head, cp0))
+                               continue;
+                       /* fall through */
+               case TOMOYO_EXCEPTIONPOLICY:
+                       if (!strcmp(cp0, "select transition_only")) {
+                               head->r.print_transition_related_only = true;
+                               continue;
+                       }
+                       /* fall through */
+               default:
+                       if (!tomoyo_manager()) {
+                               error = -EPERM;
+                               goto out;
+                       }
+               }
+               switch (tomoyo_parse_policy(head, cp0)) {
+               case -EPERM:
+                       error = -EPERM;
+                       goto out;
+               case 0:
+                       switch (head->type) {
+                       case TOMOYO_DOMAINPOLICY:
+                       case TOMOYO_EXCEPTIONPOLICY:
+                       case TOMOYO_STAT:
+                       case TOMOYO_PROFILE:
+                       case TOMOYO_MANAGER:
+                               tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
+                               break;
+                       default:
+                               break;
+                       }
+                       break;
+               }
        }
+out:
+       tomoyo_read_unlock(idx);
        mutex_unlock(&head->io_sem);
        return error;
 }
@@ -2015,35 +2534,20 @@ int tomoyo_write_control(struct file *file, const char __user *buffer,
 /**
  * tomoyo_close_control - close() for /sys/kernel/security/tomoyo/ interface.
  *
- * @file: Pointer to "struct file".
- *
- * Releases memory and returns 0.
+ * @head: Pointer to "struct tomoyo_io_buffer".
  *
- * Caller looses tomoyo_read_lock().
+ * Returns 0.
  */
-int tomoyo_close_control(struct file *file)
+int tomoyo_close_control(struct tomoyo_io_buffer *head)
 {
-       struct tomoyo_io_buffer *head = file->private_data;
-       const bool is_write = !!head->write_buf;
-
        /*
         * If the file is /sys/kernel/security/tomoyo/query , decrement the
         * observer counter.
         */
-       if (head->type == TOMOYO_QUERY)
-               atomic_dec(&tomoyo_query_observers);
-       else
-               tomoyo_read_unlock(head->reader_idx);
-       /* Release memory used for policy I/O. */
-       kfree(head->read_buf);
-       head->read_buf = NULL;
-       kfree(head->write_buf);
-       head->write_buf = NULL;
-       kfree(head);
-       head = NULL;
-       file->private_data = NULL;
-       if (is_write)
-               tomoyo_run_gc();
+       if (head->type == TOMOYO_QUERY &&
+           atomic_dec_and_test(&tomoyo_query_observers))
+               wake_up_all(&tomoyo_answer_wait);
+       tomoyo_notify_gc(head, false);
        return 0;
 }
 
@@ -2055,27 +2559,90 @@ void tomoyo_check_profile(void)
        struct tomoyo_domain_info *domain;
        const int idx = tomoyo_read_lock();
        tomoyo_policy_loaded = true;
-       /* Check all profiles currently assigned to domains are defined. */
+       printk(KERN_INFO "TOMOYO: 2.4.0\n");
        list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
                const u8 profile = domain->profile;
-               if (tomoyo_profile_ptr[profile])
+               const struct tomoyo_policy_namespace *ns = domain->ns;
+               if (ns->profile_version != 20100903)
+                       printk(KERN_ERR
+                              "Profile version %u is not supported.\n",
+                              ns->profile_version);
+               else if (!ns->profile_ptr[profile])
+                       printk(KERN_ERR
+                              "Profile %u (used by '%s') is not defined.\n",
+                              profile, domain->domainname->name);
+               else
                        continue;
-               printk(KERN_ERR "You need to define profile %u before using it.\n",
-                      profile);
-               printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ "
+               printk(KERN_ERR
+                      "Userland tools for TOMOYO 2.4 must be installed and "
+                      "policy must be initialized.\n");
+               printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.4/ "
                       "for more information.\n");
-               panic("Profile %u (used by '%s') not defined.\n",
-                     profile, domain->domainname->name);
+               panic("STOP!");
        }
        tomoyo_read_unlock(idx);
-       if (tomoyo_profile_version != 20090903) {
-               printk(KERN_ERR "You need to install userland programs for "
-                      "TOMOYO 2.3 and initialize policy configuration.\n");
-               printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ "
-                      "for more information.\n");
-               panic("Profile version %u is not supported.\n",
-                     tomoyo_profile_version);
-       }
-       printk(KERN_INFO "TOMOYO: 2.3.0\n");
        printk(KERN_INFO "Mandatory Access Control activated.\n");
 }
+
+/**
+ * tomoyo_load_builtin_policy - Load built-in policy.
+ *
+ * Returns nothing.
+ */
+void __init tomoyo_load_builtin_policy(void)
+{
+       /*
+        * This include file is manually created and contains built-in policy
+        * named "tomoyo_builtin_profile", "tomoyo_builtin_exception_policy",
+        * "tomoyo_builtin_domain_policy", "tomoyo_builtin_manager",
+        * "tomoyo_builtin_stat" in the form of "static char [] __initdata".
+        */
+#include "builtin-policy.h"
+       u8 i;
+       const int idx = tomoyo_read_lock();
+       for (i = 0; i < 5; i++) {
+               struct tomoyo_io_buffer head = { };
+               char *start = "";
+               switch (i) {
+               case 0:
+                       start = tomoyo_builtin_profile;
+                       head.type = TOMOYO_PROFILE;
+                       head.write = tomoyo_write_profile;
+                       break;
+               case 1:
+                       start = tomoyo_builtin_exception_policy;
+                       head.type = TOMOYO_EXCEPTIONPOLICY;
+                       head.write = tomoyo_write_exception;
+                       break;
+               case 2:
+                       start = tomoyo_builtin_domain_policy;
+                       head.type = TOMOYO_DOMAINPOLICY;
+                       head.write = tomoyo_write_domain;
+                       break;
+               case 3:
+                       start = tomoyo_builtin_manager;
+                       head.type = TOMOYO_MANAGER;
+                       head.write = tomoyo_write_manager;
+                       break;
+               case 4:
+                       start = tomoyo_builtin_stat;
+                       head.type = TOMOYO_STAT;
+                       head.write = tomoyo_write_stat;
+                       break;
+               }
+               while (1) {
+                       char *end = strchr(start, '\n');
+                       if (!end)
+                               break;
+                       *end = '\0';
+                       tomoyo_normalize_line(start);
+                       head.write_buf = start;
+                       tomoyo_parse_policy(&head, start);
+                       start = end + 1;
+               }
+       }
+       tomoyo_read_unlock(idx);
+#ifdef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+       tomoyo_check_profile();
+#endif
+}
index 7c66bd8..f7fbaa6 100644 (file)
@@ -21,7 +21,8 @@
 #include <linux/list.h>
 #include <linux/cred.h>
 #include <linux/poll.h>
-struct linux_binprm;
+#include <linux/binfmts.h>
+#include <linux/highmem.h>
 
 /********** Constants definitions. **********/
 
@@ -38,66 +39,149 @@ struct linux_binprm;
 /* Profile number is an integer between 0 and 255. */
 #define TOMOYO_MAX_PROFILES 256
 
+/* Group number is an integer between 0 and 255. */
+#define TOMOYO_MAX_ACL_GROUPS 256
+
+/* Index numbers for "struct tomoyo_condition". */
+enum tomoyo_conditions_index {
+       TOMOYO_TASK_UID,             /* current_uid()   */
+       TOMOYO_TASK_EUID,            /* current_euid()  */
+       TOMOYO_TASK_SUID,            /* current_suid()  */
+       TOMOYO_TASK_FSUID,           /* current_fsuid() */
+       TOMOYO_TASK_GID,             /* current_gid()   */
+       TOMOYO_TASK_EGID,            /* current_egid()  */
+       TOMOYO_TASK_SGID,            /* current_sgid()  */
+       TOMOYO_TASK_FSGID,           /* current_fsgid() */
+       TOMOYO_TASK_PID,             /* sys_getpid()   */
+       TOMOYO_TASK_PPID,            /* sys_getppid()  */
+       TOMOYO_EXEC_ARGC,            /* "struct linux_binprm *"->argc */
+       TOMOYO_EXEC_ENVC,            /* "struct linux_binprm *"->envc */
+       TOMOYO_TYPE_IS_SOCKET,       /* S_IFSOCK */
+       TOMOYO_TYPE_IS_SYMLINK,      /* S_IFLNK */
+       TOMOYO_TYPE_IS_FILE,         /* S_IFREG */
+       TOMOYO_TYPE_IS_BLOCK_DEV,    /* S_IFBLK */
+       TOMOYO_TYPE_IS_DIRECTORY,    /* S_IFDIR */
+       TOMOYO_TYPE_IS_CHAR_DEV,     /* S_IFCHR */
+       TOMOYO_TYPE_IS_FIFO,         /* S_IFIFO */
+       TOMOYO_MODE_SETUID,          /* S_ISUID */
+       TOMOYO_MODE_SETGID,          /* S_ISGID */
+       TOMOYO_MODE_STICKY,          /* S_ISVTX */
+       TOMOYO_MODE_OWNER_READ,      /* S_IRUSR */
+       TOMOYO_MODE_OWNER_WRITE,     /* S_IWUSR */
+       TOMOYO_MODE_OWNER_EXECUTE,   /* S_IXUSR */
+       TOMOYO_MODE_GROUP_READ,      /* S_IRGRP */
+       TOMOYO_MODE_GROUP_WRITE,     /* S_IWGRP */
+       TOMOYO_MODE_GROUP_EXECUTE,   /* S_IXGRP */
+       TOMOYO_MODE_OTHERS_READ,     /* S_IROTH */
+       TOMOYO_MODE_OTHERS_WRITE,    /* S_IWOTH */
+       TOMOYO_MODE_OTHERS_EXECUTE,  /* S_IXOTH */
+       TOMOYO_EXEC_REALPATH,
+       TOMOYO_SYMLINK_TARGET,
+       TOMOYO_PATH1_UID,
+       TOMOYO_PATH1_GID,
+       TOMOYO_PATH1_INO,
+       TOMOYO_PATH1_MAJOR,
+       TOMOYO_PATH1_MINOR,
+       TOMOYO_PATH1_PERM,
+       TOMOYO_PATH1_TYPE,
+       TOMOYO_PATH1_DEV_MAJOR,
+       TOMOYO_PATH1_DEV_MINOR,
+       TOMOYO_PATH2_UID,
+       TOMOYO_PATH2_GID,
+       TOMOYO_PATH2_INO,
+       TOMOYO_PATH2_MAJOR,
+       TOMOYO_PATH2_MINOR,
+       TOMOYO_PATH2_PERM,
+       TOMOYO_PATH2_TYPE,
+       TOMOYO_PATH2_DEV_MAJOR,
+       TOMOYO_PATH2_DEV_MINOR,
+       TOMOYO_PATH1_PARENT_UID,
+       TOMOYO_PATH1_PARENT_GID,
+       TOMOYO_PATH1_PARENT_INO,
+       TOMOYO_PATH1_PARENT_PERM,
+       TOMOYO_PATH2_PARENT_UID,
+       TOMOYO_PATH2_PARENT_GID,
+       TOMOYO_PATH2_PARENT_INO,
+       TOMOYO_PATH2_PARENT_PERM,
+       TOMOYO_MAX_CONDITION_KEYWORD,
+       TOMOYO_NUMBER_UNION,
+       TOMOYO_NAME_UNION,
+       TOMOYO_ARGV_ENTRY,
+       TOMOYO_ENVP_ENTRY,
+};
+
+
+/* Index numbers for stat(). */
+enum tomoyo_path_stat_index {
+       /* Do not change this order. */
+       TOMOYO_PATH1,
+       TOMOYO_PATH1_PARENT,
+       TOMOYO_PATH2,
+       TOMOYO_PATH2_PARENT,
+       TOMOYO_MAX_PATH_STAT
+};
+
+/* Index numbers for operation mode. */
 enum tomoyo_mode_index {
        TOMOYO_CONFIG_DISABLED,
        TOMOYO_CONFIG_LEARNING,
        TOMOYO_CONFIG_PERMISSIVE,
        TOMOYO_CONFIG_ENFORCING,
-       TOMOYO_CONFIG_USE_DEFAULT = 255
+       TOMOYO_CONFIG_MAX_MODE,
+       TOMOYO_CONFIG_WANT_REJECT_LOG =  64,
+       TOMOYO_CONFIG_WANT_GRANT_LOG  = 128,
+       TOMOYO_CONFIG_USE_DEFAULT     = 255,
 };
 
+/* Index numbers for entry type. */
 enum tomoyo_policy_id {
        TOMOYO_ID_GROUP,
        TOMOYO_ID_PATH_GROUP,
        TOMOYO_ID_NUMBER_GROUP,
        TOMOYO_ID_TRANSITION_CONTROL,
        TOMOYO_ID_AGGREGATOR,
-       TOMOYO_ID_GLOBALLY_READABLE,
-       TOMOYO_ID_PATTERN,
-       TOMOYO_ID_NO_REWRITE,
        TOMOYO_ID_MANAGER,
+       TOMOYO_ID_CONDITION,
        TOMOYO_ID_NAME,
        TOMOYO_ID_ACL,
        TOMOYO_ID_DOMAIN,
        TOMOYO_MAX_POLICY
 };
 
+/* Index numbers for domain's attributes. */
+enum tomoyo_domain_info_flags_index {
+       /* Quota warnning flag.   */
+       TOMOYO_DIF_QUOTA_WARNED,
+       /*
+        * This domain was unable to create a new domain at
+        * tomoyo_find_next_domain() because the name of the domain to be
+        * created was too long or it could not allocate memory.
+        * More than one process continued execve() without domain transition.
+        */
+       TOMOYO_DIF_TRANSITION_FAILED,
+       TOMOYO_MAX_DOMAIN_INFO_FLAGS
+};
+
+/* Index numbers for group entries. */
 enum tomoyo_group_id {
        TOMOYO_PATH_GROUP,
        TOMOYO_NUMBER_GROUP,
        TOMOYO_MAX_GROUP
 };
 
-/* Keywords for ACLs. */
-#define TOMOYO_KEYWORD_AGGREGATOR                "aggregator "
-#define TOMOYO_KEYWORD_ALLOW_MOUNT               "allow_mount "
-#define TOMOYO_KEYWORD_ALLOW_READ                "allow_read "
-#define TOMOYO_KEYWORD_DELETE                    "delete "
-#define TOMOYO_KEYWORD_DENY_REWRITE              "deny_rewrite "
-#define TOMOYO_KEYWORD_FILE_PATTERN              "file_pattern "
-#define TOMOYO_KEYWORD_INITIALIZE_DOMAIN         "initialize_domain "
-#define TOMOYO_KEYWORD_KEEP_DOMAIN               "keep_domain "
-#define TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN      "no_initialize_domain "
-#define TOMOYO_KEYWORD_NO_KEEP_DOMAIN            "no_keep_domain "
-#define TOMOYO_KEYWORD_PATH_GROUP                "path_group "
-#define TOMOYO_KEYWORD_NUMBER_GROUP              "number_group "
-#define TOMOYO_KEYWORD_SELECT                    "select "
-#define TOMOYO_KEYWORD_USE_PROFILE               "use_profile "
-#define TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ  "ignore_global_allow_read"
-#define TOMOYO_KEYWORD_QUOTA_EXCEEDED            "quota_exceeded"
-#define TOMOYO_KEYWORD_TRANSITION_FAILED         "transition_failed"
-/* A domain definition starts with <kernel>. */
-#define TOMOYO_ROOT_NAME                         "<kernel>"
-#define TOMOYO_ROOT_NAME_LEN                     (sizeof(TOMOYO_ROOT_NAME) - 1)
-
-/* Value type definition. */
-#define TOMOYO_VALUE_TYPE_INVALID     0
-#define TOMOYO_VALUE_TYPE_DECIMAL     1
-#define TOMOYO_VALUE_TYPE_OCTAL       2
-#define TOMOYO_VALUE_TYPE_HEXADECIMAL 3
+/* Index numbers for type of numeric values. */
+enum tomoyo_value_type {
+       TOMOYO_VALUE_TYPE_INVALID,
+       TOMOYO_VALUE_TYPE_DECIMAL,
+       TOMOYO_VALUE_TYPE_OCTAL,
+       TOMOYO_VALUE_TYPE_HEXADECIMAL,
+};
 
+/* Index numbers for domain transition control keywords. */
 enum tomoyo_transition_type {
        /* Do not change this order, */
+       TOMOYO_TRANSITION_CONTROL_NO_RESET,
+       TOMOYO_TRANSITION_CONTROL_RESET,
        TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE,
        TOMOYO_TRANSITION_CONTROL_INITIALIZE,
        TOMOYO_TRANSITION_CONTROL_NO_KEEP,
@@ -114,35 +198,29 @@ enum tomoyo_acl_entry_type_index {
        TOMOYO_TYPE_MOUNT_ACL,
 };
 
-/* Index numbers for File Controls. */
-
-/*
- * TOMOYO_TYPE_READ_WRITE is special. TOMOYO_TYPE_READ_WRITE is automatically
- * set if both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are set.
- * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically set if
- * TOMOYO_TYPE_READ_WRITE is set.
- * TOMOYO_TYPE_READ_WRITE is automatically cleared if either TOMOYO_TYPE_READ
- * or TOMOYO_TYPE_WRITE is cleared.
- * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically cleared if
- * TOMOYO_TYPE_READ_WRITE is cleared.
- */
-
+/* Index numbers for access controls with one pathname. */
 enum tomoyo_path_acl_index {
-       TOMOYO_TYPE_READ_WRITE,
        TOMOYO_TYPE_EXECUTE,
        TOMOYO_TYPE_READ,
        TOMOYO_TYPE_WRITE,
+       TOMOYO_TYPE_APPEND,
        TOMOYO_TYPE_UNLINK,
+       TOMOYO_TYPE_GETATTR,
        TOMOYO_TYPE_RMDIR,
        TOMOYO_TYPE_TRUNCATE,
        TOMOYO_TYPE_SYMLINK,
-       TOMOYO_TYPE_REWRITE,
        TOMOYO_TYPE_CHROOT,
        TOMOYO_TYPE_UMOUNT,
        TOMOYO_MAX_PATH_OPERATION
 };
 
-#define TOMOYO_RW_MASK ((1 << TOMOYO_TYPE_READ) | (1 << TOMOYO_TYPE_WRITE))
+/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
+enum tomoyo_memory_stat_type {
+       TOMOYO_MEMORY_POLICY,
+       TOMOYO_MEMORY_AUDIT,
+       TOMOYO_MEMORY_QUERY,
+       TOMOYO_MAX_MEMORY_STAT
+};
 
 enum tomoyo_mkdev_acl_index {
        TOMOYO_TYPE_MKBLOCK,
@@ -150,6 +228,7 @@ enum tomoyo_mkdev_acl_index {
        TOMOYO_MAX_MKDEV_OPERATION
 };
 
+/* Index numbers for access controls with two pathnames. */
 enum tomoyo_path2_acl_index {
        TOMOYO_TYPE_LINK,
        TOMOYO_TYPE_RENAME,
@@ -157,6 +236,7 @@ enum tomoyo_path2_acl_index {
        TOMOYO_MAX_PATH2_OPERATION
 };
 
+/* Index numbers for access controls with one pathname and one number. */
 enum tomoyo_path_number_acl_index {
        TOMOYO_TYPE_CREATE,
        TOMOYO_TYPE_MKDIR,
@@ -169,31 +249,45 @@ enum tomoyo_path_number_acl_index {
        TOMOYO_MAX_PATH_NUMBER_OPERATION
 };
 
+/* Index numbers for /sys/kernel/security/tomoyo/ interfaces. */
 enum tomoyo_securityfs_interface_index {
        TOMOYO_DOMAINPOLICY,
        TOMOYO_EXCEPTIONPOLICY,
-       TOMOYO_DOMAIN_STATUS,
        TOMOYO_PROCESS_STATUS,
-       TOMOYO_MEMINFO,
+       TOMOYO_STAT,
        TOMOYO_SELFDOMAIN,
+       TOMOYO_AUDIT,
        TOMOYO_VERSION,
        TOMOYO_PROFILE,
        TOMOYO_QUERY,
        TOMOYO_MANAGER
 };
 
+/* Index numbers for special mount operations. */
+enum tomoyo_special_mount {
+       TOMOYO_MOUNT_BIND,            /* mount --bind /source /dest   */
+       TOMOYO_MOUNT_MOVE,            /* mount --move /old /new       */
+       TOMOYO_MOUNT_REMOUNT,         /* mount -o remount /dir        */
+       TOMOYO_MOUNT_MAKE_UNBINDABLE, /* mount --make-unbindable /dir */
+       TOMOYO_MOUNT_MAKE_PRIVATE,    /* mount --make-private /dir    */
+       TOMOYO_MOUNT_MAKE_SLAVE,      /* mount --make-slave /dir      */
+       TOMOYO_MOUNT_MAKE_SHARED,     /* mount --make-shared /dir     */
+       TOMOYO_MAX_SPECIAL_MOUNT
+};
+
+/* Index numbers for functionality. */
 enum tomoyo_mac_index {
        TOMOYO_MAC_FILE_EXECUTE,
        TOMOYO_MAC_FILE_OPEN,
        TOMOYO_MAC_FILE_CREATE,
        TOMOYO_MAC_FILE_UNLINK,
+       TOMOYO_MAC_FILE_GETATTR,
        TOMOYO_MAC_FILE_MKDIR,
        TOMOYO_MAC_FILE_RMDIR,
        TOMOYO_MAC_FILE_MKFIFO,
        TOMOYO_MAC_FILE_MKSOCK,
        TOMOYO_MAC_FILE_TRUNCATE,
        TOMOYO_MAC_FILE_SYMLINK,
-       TOMOYO_MAC_FILE_REWRITE,
        TOMOYO_MAC_FILE_MKBLOCK,
        TOMOYO_MAC_FILE_MKCHAR,
        TOMOYO_MAC_FILE_LINK,
@@ -209,38 +303,66 @@ enum tomoyo_mac_index {
        TOMOYO_MAX_MAC_INDEX
 };
 
+/* Index numbers for category of functionality. */
 enum tomoyo_mac_category_index {
        TOMOYO_MAC_CATEGORY_FILE,
        TOMOYO_MAX_MAC_CATEGORY_INDEX
 };
 
-#define TOMOYO_RETRY_REQUEST 1 /* Retry this request. */
-
-/********** Structure definitions. **********/
-
 /*
- * tomoyo_acl_head is a structure which is used for holding elements not in
- * domain policy.
- * It has following fields.
+ * Retry this request. Returned by tomoyo_supervisor() if policy violation has
+ * occurred in enforcing mode and the userspace daemon decided to retry.
  *
- *  (1) "list" which is linked to tomoyo_policy_list[] .
- *  (2) "is_deleted" is a bool which is true if marked as deleted, false
- *      otherwise.
+ * We must choose a positive value in order to distinguish "granted" (which is
+ * 0) and "rejected" (which is a negative value) and "retry".
  */
+#define TOMOYO_RETRY_REQUEST 1
+
+/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
+enum tomoyo_policy_stat_type {
+       /* Do not change this order. */
+       TOMOYO_STAT_POLICY_UPDATES,
+       TOMOYO_STAT_POLICY_LEARNING,   /* == TOMOYO_CONFIG_LEARNING */
+       TOMOYO_STAT_POLICY_PERMISSIVE, /* == TOMOYO_CONFIG_PERMISSIVE */
+       TOMOYO_STAT_POLICY_ENFORCING,  /* == TOMOYO_CONFIG_ENFORCING */
+       TOMOYO_MAX_POLICY_STAT
+};
+
+/* Index numbers for profile's PREFERENCE values. */
+enum tomoyo_pref_index {
+       TOMOYO_PREF_MAX_AUDIT_LOG,
+       TOMOYO_PREF_MAX_LEARNING_ENTRY,
+       TOMOYO_MAX_PREF
+};
+
+/********** Structure definitions. **********/
+
+/* Common header for holding ACL entries. */
 struct tomoyo_acl_head {
        struct list_head list;
        bool is_deleted;
 } __packed;
 
-/*
- * tomoyo_request_info is a structure which is used for holding
- *
- * (1) Domain information of current process.
- * (2) How many retries are made for this request.
- * (3) Profile number used for this request.
- * (4) Access control mode of the profile.
- */
+/* Common header for shared entries. */
+struct tomoyo_shared_acl_head {
+       struct list_head list;
+       atomic_t users;
+} __packed;
+
+struct tomoyo_policy_namespace;
+
+/* Structure for request info. */
 struct tomoyo_request_info {
+       /*
+        * For holding parameters specific to operations which deal files.
+        * NULL if not dealing files.
+        */
+       struct tomoyo_obj_info *obj;
+       /*
+        * For holding parameters specific to execve() request.
+        * NULL if not dealing do_execve().
+        */
+       struct tomoyo_execve *ee;
        struct tomoyo_domain_info *domain;
        /* For holding parameters. */
        union {
@@ -248,11 +370,13 @@ struct tomoyo_request_info {
                        const struct tomoyo_path_info *filename;
                        /* For using wildcards at tomoyo_find_next_domain(). */
                        const struct tomoyo_path_info *matched_path;
+                       /* One of values in "enum tomoyo_path_acl_index". */
                        u8 operation;
                } path;
                struct {
                        const struct tomoyo_path_info *filename1;
                        const struct tomoyo_path_info *filename2;
+                       /* One of values in "enum tomoyo_path2_acl_index". */
                        u8 operation;
                } path2;
                struct {
@@ -260,11 +384,16 @@ struct tomoyo_request_info {
                        unsigned int mode;
                        unsigned int major;
                        unsigned int minor;
+                       /* One of values in "enum tomoyo_mkdev_acl_index". */
                        u8 operation;
                } mkdev;
                struct {
                        const struct tomoyo_path_info *filename;
                        unsigned long number;
+                       /*
+                        * One of values in
+                        * "enum tomoyo_path_number_acl_index".
+                        */
                        u8 operation;
                } path_number;
                struct {
@@ -283,26 +412,7 @@ struct tomoyo_request_info {
        u8 type;
 };
 
-/*
- * tomoyo_path_info is a structure which is used for holding a string data
- * used by TOMOYO.
- * This structure has several fields for supporting pattern matching.
- *
- * (1) "name" is the '\0' terminated string data.
- * (2) "hash" is full_name_hash(name, strlen(name)).
- *     This allows tomoyo_pathcmp() to compare by hash before actually compare
- *     using strcmp().
- * (3) "const_len" is the length of the initial segment of "name" which
- *     consists entirely of non wildcard characters. In other words, the length
- *     which we can compare two strings using strncmp().
- * (4) "is_dir" is a bool which is true if "name" ends with "/",
- *     false otherwise.
- *     TOMOYO distinguishes directory and non-directory. A directory ends with
- *     "/" and non-directory does not end with "/".
- * (5) "is_patterned" is a bool which is true if "name" contains wildcard
- *     characters, false otherwise. This allows TOMOYO to use "hash" and
- *     strcmp() for string comparison if "is_patterned" is false.
- */
+/* Structure for holding a token. */
 struct tomoyo_path_info {
        const char *name;
        u32 hash;          /* = full_name_hash(name, strlen(name)) */
@@ -311,36 +421,32 @@ struct tomoyo_path_info {
        bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
 };
 
-/*
- * tomoyo_name is a structure which is used for linking
- * "struct tomoyo_path_info" into tomoyo_name_list .
- */
+/* Structure for holding string data. */
 struct tomoyo_name {
-       struct list_head list;
-       atomic_t users;
+       struct tomoyo_shared_acl_head head;
        struct tomoyo_path_info entry;
 };
 
+/* Structure for holding a word. */
 struct tomoyo_name_union {
+       /* Either @filename or @group is NULL. */
        const struct tomoyo_path_info *filename;
        struct tomoyo_group *group;
-       u8 is_group;
 };
 
+/* Structure for holding a number. */
 struct tomoyo_number_union {
        unsigned long values[2];
-       struct tomoyo_group *group;
-       u8 min_type;
-       u8 max_type;
-       u8 is_group;
+       struct tomoyo_group *group; /* Maybe NULL. */
+       /* One of values in "enum tomoyo_value_type". */
+       u8 value_type[2];
 };
 
 /* Structure for "path_group"/"number_group" directive. */
 struct tomoyo_group {
-       struct list_head list;
+       struct tomoyo_shared_acl_head head;
        const struct tomoyo_path_info *group_name;
        struct list_head member_list;
-       atomic_t users;
 };
 
 /* Structure for "path_group" directive. */
@@ -355,130 +461,158 @@ struct tomoyo_number_group {
        struct tomoyo_number_union number;
 };
 
-/*
- * tomoyo_acl_info is a structure which is used for holding
- *
- *  (1) "list" which is linked to the ->acl_info_list of
- *      "struct tomoyo_domain_info"
- *  (2) "is_deleted" is a bool which is true if this domain is marked as
- *      "deleted", false otherwise.
- *  (3) "type" which tells type of the entry.
- *
- * Packing "struct tomoyo_acl_info" allows
- * "struct tomoyo_path_acl" to embed "u16" and "struct tomoyo_path2_acl"
- * "struct tomoyo_path_number_acl" "struct tomoyo_mkdev_acl" to embed
- * "u8" without enlarging their structure size.
- */
+/* Subset of "struct stat". Used by conditional ACL and audit logs. */
+struct tomoyo_mini_stat {
+       uid_t uid;
+       gid_t gid;
+       ino_t ino;
+       mode_t mode;
+       dev_t dev;
+       dev_t rdev;
+};
+
+/* Structure for dumping argv[] and envp[] of "struct linux_binprm". */
+struct tomoyo_page_dump {
+       struct page *page;    /* Previously dumped page. */
+       char *data;           /* Contents of "page". Size is PAGE_SIZE. */
+};
+
+/* Structure for attribute checks in addition to pathname checks. */
+struct tomoyo_obj_info {
+       /*
+        * True if tomoyo_get_attributes() was already called, false otherwise.
+        */
+       bool validate_done;
+       /* True if @stat[] is valid. */
+       bool stat_valid[TOMOYO_MAX_PATH_STAT];
+       /* First pathname. Initialized with { NULL, NULL } if no path. */
+       struct path path1;
+       /* Second pathname. Initialized with { NULL, NULL } if no path. */
+       struct path path2;
+       /*
+        * Information on @path1, @path1's parent directory, @path2, @path2's
+        * parent directory.
+        */
+       struct tomoyo_mini_stat stat[TOMOYO_MAX_PATH_STAT];
+       /*
+        * Content of symbolic link to be created. NULL for operations other
+        * than symlink().
+        */
+       struct tomoyo_path_info *symlink_target;
+};
+
+/* Structure for argv[]. */
+struct tomoyo_argv {
+       unsigned long index;
+       const struct tomoyo_path_info *value;
+       bool is_not;
+};
+
+/* Structure for envp[]. */
+struct tomoyo_envp {
+       const struct tomoyo_path_info *name;
+       const struct tomoyo_path_info *value;
+       bool is_not;
+};
+
+/* Structure for execve() operation. */
+struct tomoyo_execve {
+       struct tomoyo_request_info r;
+       struct tomoyo_obj_info obj;
+       struct linux_binprm *bprm;
+       /* For dumping argv[] and envp[]. */
+       struct tomoyo_page_dump dump;
+       /* For temporary use. */
+       char *tmp; /* Size is TOMOYO_EXEC_TMPSIZE bytes */
+};
+
+/* Structure for entries which follows "struct tomoyo_condition". */
+struct tomoyo_condition_element {
+       /*
+        * Left hand operand. A "struct tomoyo_argv" for TOMOYO_ARGV_ENTRY, a
+        * "struct tomoyo_envp" for TOMOYO_ENVP_ENTRY is attached to the tail
+        * of the array of this struct.
+        */
+       u8 left;
+       /*
+        * Right hand operand. A "struct tomoyo_number_union" for
+        * TOMOYO_NUMBER_UNION, a "struct tomoyo_name_union" for
+        * TOMOYO_NAME_UNION is attached to the tail of the array of this
+        * struct.
+        */
+       u8 right;
+       /* Equation operator. True if equals or overlaps, false otherwise. */
+       bool equals;
+};
+
+/* Structure for optional arguments. */
+struct tomoyo_condition {
+       struct tomoyo_shared_acl_head head;
+       u32 size; /* Memory size allocated for this entry. */
+       u16 condc; /* Number of conditions in this struct. */
+       u16 numbers_count; /* Number of "struct tomoyo_number_union values". */
+       u16 names_count; /* Number of "struct tomoyo_name_union names". */
+       u16 argc; /* Number of "struct tomoyo_argv". */
+       u16 envc; /* Number of "struct tomoyo_envp". */
+       /*
+        * struct tomoyo_condition_element condition[condc];
+        * struct tomoyo_number_union values[numbers_count];
+        * struct tomoyo_name_union names[names_count];
+        * struct tomoyo_argv argv[argc];
+        * struct tomoyo_envp envp[envc];
+        */
+};
+
+/* Common header for individual entries. */
 struct tomoyo_acl_info {
        struct list_head list;
+       struct tomoyo_condition *cond; /* Maybe NULL. */
        bool is_deleted;
-       u8 type; /* = one of values in "enum tomoyo_acl_entry_type_index". */
+       u8 type; /* One of values in "enum tomoyo_acl_entry_type_index". */
 } __packed;
 
-/*
- * tomoyo_domain_info is a structure which is used for holding permissions
- * (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
- * It has following fields.
- *
- *  (1) "list" which is linked to tomoyo_domain_list .
- *  (2) "acl_info_list" which is linked to "struct tomoyo_acl_info".
- *  (3) "domainname" which holds the name of the domain.
- *  (4) "profile" which remembers profile number assigned to this domain.
- *  (5) "is_deleted" is a bool which is true if this domain is marked as
- *      "deleted", false otherwise.
- *  (6) "quota_warned" is a bool which is used for suppressing warning message
- *      when learning mode learned too much entries.
- *  (7) "ignore_global_allow_read" is a bool which is true if this domain
- *      should ignore "allow_read" directive in exception policy.
- *  (8) "transition_failed" is a bool which is set to true when this domain was
- *      unable to create a new domain at tomoyo_find_next_domain() because the
- *      name of the domain to be created was too long or it could not allocate
- *      memory. If set to true, more than one process continued execve()
- *      without domain transition.
- *  (9) "users" is an atomic_t that holds how many "struct cred"->security
- *      are referring this "struct tomoyo_domain_info". If is_deleted == true
- *      and users == 0, this struct will be kfree()d upon next garbage
- *      collection.
- *
- * A domain's lifecycle is an analogy of files on / directory.
- * Multiple domains with the same domainname cannot be created (as with
- * creating files with the same filename fails with -EEXIST).
- * If a process reached a domain, that process can reside in that domain after
- * that domain is marked as "deleted" (as with a process can access an already
- * open()ed file after that file was unlink()ed).
- */
+/* Structure for domain information. */
 struct tomoyo_domain_info {
        struct list_head list;
        struct list_head acl_info_list;
        /* Name of this domain. Never NULL.          */
        const struct tomoyo_path_info *domainname;
+       /* Namespace for this domain. Never NULL. */
+       struct tomoyo_policy_namespace *ns;
        u8 profile;        /* Profile number to use. */
+       u8 group;          /* Group number to use.   */
        bool is_deleted;   /* Delete flag.           */
-       bool quota_warned; /* Quota warnning flag.   */
-       bool ignore_global_allow_read; /* Ignore "allow_read" flag. */
-       bool transition_failed; /* Domain transition failed flag. */
+       bool flags[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
        atomic_t users; /* Number of referring credentials. */
 };
 
 /*
- * tomoyo_path_acl is a structure which is used for holding an
- * entry with one pathname operation (e.g. open(), mkdir()).
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "name" is the pathname.
- *
- * Directives held by this structure are "allow_read/write", "allow_execute",
- * "allow_read", "allow_write", "allow_unlink", "allow_rmdir",
- * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_chroot" and
- * "allow_unmount".
+ * Structure for "file execute", "file read", "file write", "file append",
+ * "file unlink", "file getattr", "file rmdir", "file truncate",
+ * "file symlink", "file chroot" and "file unmount" directive.
  */
 struct tomoyo_path_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_ACL */
-       u16 perm;
+       u16 perm; /* Bitmask of values in "enum tomoyo_path_acl_index". */
        struct tomoyo_name_union name;
 };
 
 /*
- * tomoyo_path_number_acl is a structure which is used for holding an
- * entry with one pathname and one number operation.
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "name" is the pathname.
- *  (4) "number" is the numeric value.
- *
- * Directives held by this structure are "allow_create", "allow_mkdir",
- * "allow_ioctl", "allow_mkfifo", "allow_mksock", "allow_chmod", "allow_chown"
- * and "allow_chgrp".
- *
+ * Structure for "file create", "file mkdir", "file mkfifo", "file mksock",
+ * "file ioctl", "file chmod", "file chown" and "file chgrp" directive.
  */
 struct tomoyo_path_number_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_NUMBER_ACL */
+       /* Bitmask of values in "enum tomoyo_path_number_acl_index". */
        u8 perm;
        struct tomoyo_name_union name;
        struct tomoyo_number_union number;
 };
 
-/*
- * tomoyo_mkdev_acl is a structure which is used for holding an
- * entry with one pathname and three numbers operation.
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "mode" is the create mode.
- *  (4) "major" is the major number of device node.
- *  (5) "minor" is the minor number of device node.
- *
- * Directives held by this structure are "allow_mkchar", "allow_mkblock".
- *
- */
+/* Structure for "file mkblock" and "file mkchar" directive. */
 struct tomoyo_mkdev_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MKDEV_ACL */
-       u8 perm;
+       u8 perm; /* Bitmask of values in "enum tomoyo_mkdev_acl_index". */
        struct tomoyo_name_union name;
        struct tomoyo_number_union mode;
        struct tomoyo_number_union major;
@@ -486,38 +620,16 @@ struct tomoyo_mkdev_acl {
 };
 
 /*
- * tomoyo_path2_acl is a structure which is used for holding an
- * entry with two pathnames operation (i.e. link(), rename() and pivot_root()).
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "perm" which is a bitmask of permitted operations.
- *  (3) "name1" is the source/old pathname.
- *  (4) "name2" is the destination/new pathname.
- *
- * Directives held by this structure are "allow_rename", "allow_link" and
- * "allow_pivot_root".
+ * Structure for "file rename", "file link" and "file pivot_root" directive.
  */
 struct tomoyo_path2_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH2_ACL */
-       u8 perm;
+       u8 perm; /* Bitmask of values in "enum tomoyo_path2_acl_index". */
        struct tomoyo_name_union name1;
        struct tomoyo_name_union name2;
 };
 
-/*
- * tomoyo_mount_acl is a structure which is used for holding an
- * entry for mount operation.
- * It has following fields.
- *
- *  (1) "head" which is a "struct tomoyo_acl_info".
- *  (2) "dev_name" is the device name.
- *  (3) "dir_name" is the mount point.
- *  (4) "fs_type" is the filesystem type.
- *  (5) "flags" is the mount flags.
- *
- * Directive held by this structure is "allow_mount".
- */
+/* Structure for "file mount" directive. */
 struct tomoyo_mount_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MOUNT_ACL */
        struct tomoyo_name_union dev_name;
@@ -526,7 +638,15 @@ struct tomoyo_mount_acl {
        struct tomoyo_number_union flags;
 };
 
-#define TOMOYO_MAX_IO_READ_QUEUE 32
+/* Structure for holding a line from /sys/kernel/security/tomoyo/ interface. */
+struct tomoyo_acl_param {
+       char *data;
+       struct list_head *list;
+       struct tomoyo_policy_namespace *ns;
+       bool is_delete;
+};
+
+#define TOMOYO_MAX_IO_READ_QUEUE 64
 
 /*
  * Structure for reading/writing policy via /sys/kernel/security/tomoyo
@@ -538,95 +658,55 @@ struct tomoyo_io_buffer {
        int (*poll) (struct file *file, poll_table *wait);
        /* Exclusive lock for this structure.   */
        struct mutex io_sem;
-       /* Index returned by tomoyo_read_lock(). */
-       int reader_idx;
        char __user *read_user_buf;
-       int read_user_buf_avail;
+       size_t read_user_buf_avail;
        struct {
+               struct list_head *ns;
                struct list_head *domain;
                struct list_head *group;
                struct list_head *acl;
-               int avail;
-               int step;
-               int query_index;
+               size_t avail;
+               unsigned int step;
+               unsigned int query_index;
                u16 index;
+               u16 cond_index;
+               u8 acl_group_index;
+               u8 cond_step;
                u8 bit;
                u8 w_pos;
                bool eof;
                bool print_this_domain_only;
-               bool print_execute_only;
+               bool print_transition_related_only;
+               bool print_cond_part;
                const char *w[TOMOYO_MAX_IO_READ_QUEUE];
        } r;
-       /* The position currently writing to.   */
-       struct tomoyo_domain_info *write_var1;
+       struct {
+               struct tomoyo_policy_namespace *ns;
+               /* The position currently writing to.   */
+               struct tomoyo_domain_info *domain;
+               /* Bytes available for writing.         */
+               size_t avail;
+               bool is_delete;
+       } w;
        /* Buffer for reading.                  */
        char *read_buf;
        /* Size of read buffer.                 */
-       int readbuf_size;
+       size_t readbuf_size;
        /* Buffer for writing.                  */
        char *write_buf;
-       /* Bytes available for writing.         */
-       int write_avail;
        /* Size of write buffer.                */
-       int writebuf_size;
+       size_t writebuf_size;
        /* Type of this interface.              */
-       u8 type;
-};
-
-/*
- * tomoyo_readable_file is a structure which is used for holding
- * "allow_read" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "filename" is a pathname which is allowed to open(O_RDONLY).
- */
-struct tomoyo_readable_file {
-       struct tomoyo_acl_head head;
-       const struct tomoyo_path_info *filename;
-};
-
-/*
- * tomoyo_no_pattern is a structure which is used for holding
- * "file_pattern" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "pattern" is a pathname pattern which is used for converting pathnames
- *      to pathname patterns during learning mode.
- */
-struct tomoyo_no_pattern {
-       struct tomoyo_acl_head head;
-       const struct tomoyo_path_info *pattern;
-};
-
-/*
- * tomoyo_no_rewrite is a structure which is used for holding
- * "deny_rewrite" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "pattern" is a pathname which is by default not permitted to modify
- *      already existing content.
- */
-struct tomoyo_no_rewrite {
-       struct tomoyo_acl_head head;
-       const struct tomoyo_path_info *pattern;
+       enum tomoyo_securityfs_interface_index type;
+       /* Users counter protected by tomoyo_io_buffer_list_lock. */
+       u8 users;
+       /* List for telling GC not to kfree() elements. */
+       struct list_head list;
 };
 
 /*
- * tomoyo_transition_control is a structure which is used for holding
- * "initialize_domain"/"no_initialize_domain"/"keep_domain"/"no_keep_domain"
- * entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "type" is type of this entry.
- *  (3) "is_last_name" is a bool which is true if "domainname" is "the last
- *      component of a domainname", false otherwise.
- *  (4) "domainname" which is "a domainname" or "the last component of a
- *      domainname".
- *  (5) "program" which is a program's pathname.
+ * Structure for "initialize_domain"/"no_initialize_domain"/"keep_domain"/
+ * "no_keep_domain" keyword.
  */
 struct tomoyo_transition_control {
        struct tomoyo_acl_head head;
@@ -637,32 +717,14 @@ struct tomoyo_transition_control {
        const struct tomoyo_path_info *program;    /* Maybe NULL */
 };
 
-/*
- * tomoyo_aggregator is a structure which is used for holding
- * "aggregator" entries.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "original_name" which is originally requested name.
- *  (3) "aggregated_name" which is name to rewrite.
- */
+/* Structure for "aggregator" keyword. */
 struct tomoyo_aggregator {
        struct tomoyo_acl_head head;
        const struct tomoyo_path_info *original_name;
        const struct tomoyo_path_info *aggregated_name;
 };
 
-/*
- * tomoyo_manager is a structure which is used for holding list of
- * domainnames or programs which are permitted to modify configuration via
- * /sys/kernel/security/tomoyo/ interface.
- * It has following fields.
- *
- *  (1) "head" is "struct tomoyo_acl_head".
- *  (2) "is_domain" is a bool which is true if "manager" is a domainname, false
- *      otherwise.
- *  (3) "manager" is a domainname or a program's pathname.
- */
+/* Structure for policy manager. */
 struct tomoyo_manager {
        struct tomoyo_acl_head head;
        bool is_domain;  /* True if manager is a domainname. */
@@ -677,6 +739,7 @@ struct tomoyo_preference {
        bool permissive_verbose;
 };
 
+/* Structure for /sys/kernel/security/tomnoyo/profile interface. */
 struct tomoyo_profile {
        const struct tomoyo_path_info *comment;
        struct tomoyo_preference *learning;
@@ -685,323 +748,409 @@ struct tomoyo_profile {
        struct tomoyo_preference preference;
        u8 default_config;
        u8 config[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX];
+       unsigned int pref[TOMOYO_MAX_PREF];
+};
+
+/* Structure for representing YYYY/MM/DD hh/mm/ss. */
+struct tomoyo_time {
+       u16 year;
+       u8 month;
+       u8 day;
+       u8 hour;
+       u8 min;
+       u8 sec;
+};
+
+/* Structure for policy namespace. */
+struct tomoyo_policy_namespace {
+       /* Profile table. Memory is allocated as needed. */
+       struct tomoyo_profile *profile_ptr[TOMOYO_MAX_PROFILES];
+       /* List of "struct tomoyo_group". */
+       struct list_head group_list[TOMOYO_MAX_GROUP];
+       /* List of policy. */
+       struct list_head policy_list[TOMOYO_MAX_POLICY];
+       /* The global ACL referred by "use_group" keyword. */
+       struct list_head acl_group[TOMOYO_MAX_ACL_GROUPS];
+       /* List for connecting to tomoyo_namespace_list list. */
+       struct list_head namespace_list;
+       /* Profile version. Currently only 20100903 is defined. */
+       unsigned int profile_version;
+       /* Name of this namespace (e.g. "<kernel>", "</usr/sbin/httpd>" ). */
+       const char *name;
 };
 
 /********** Function prototypes. **********/
 
-/* Check whether the given string starts with the given keyword. */
-bool tomoyo_str_starts(char **src, const char *find);
-/* Get tomoyo_realpath() of current process. */
-const char *tomoyo_get_exe(void);
-/* Format string. */
-void tomoyo_normalize_line(unsigned char *buffer);
-/* Print warning or error message on console. */
-void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...)
-     __attribute__ ((format(printf, 2, 3)));
-/* Check all profiles currently assigned to domains are defined. */
-void tomoyo_check_profile(void);
-/* Open operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_open_control(const u8 type, struct file *file);
-/* Close /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_close_control(struct file *file);
-/* Poll operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_poll_control(struct file *file, poll_table *wait);
-/* Read operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_read_control(struct file *file, char __user *buffer,
-                       const int buffer_len);
-/* Write operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_write_control(struct file *file, const char __user *buffer,
-                        const int buffer_len);
-/* Check whether the domain has too many ACL entries to hold. */
-bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
-/* Print out of memory warning message. */
-void tomoyo_warn_oom(const char *function);
-/* Check whether the given name matches the given name_union. */
-const struct tomoyo_path_info *
-tomoyo_compare_name_union(const struct tomoyo_path_info *name,
-                         const struct tomoyo_name_union *ptr);
-/* Check whether the given number matches the given number_union. */
 bool tomoyo_compare_number_union(const unsigned long value,
                                 const struct tomoyo_number_union *ptr);
-int tomoyo_get_mode(const u8 profile, const u8 index);
-void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
-       __attribute__ ((format(printf, 2, 3)));
-/* Check whether the domainname is correct. */
+bool tomoyo_condition(struct tomoyo_request_info *r,
+                     const struct tomoyo_condition *cond);
 bool tomoyo_correct_domain(const unsigned char *domainname);
-/* Check whether the token is correct. */
 bool tomoyo_correct_path(const char *filename);
 bool tomoyo_correct_word(const char *string);
-/* Check whether the token can be a domainname. */
 bool tomoyo_domain_def(const unsigned char *buffer);
-bool tomoyo_parse_name_union(const char *filename,
-                            struct tomoyo_name_union *ptr);
-/* Check whether the given filename matches the given path_group. */
-const struct tomoyo_path_info *
-tomoyo_path_matches_group(const struct tomoyo_path_info *pathname,
-                         const struct tomoyo_group *group);
-/* Check whether the given value matches the given number_group. */
+bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
+bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
+                     struct tomoyo_page_dump *dump);
+bool tomoyo_memory_ok(void *ptr);
 bool tomoyo_number_matches_group(const unsigned long min,
                                 const unsigned long max,
                                 const struct tomoyo_group *group);
-/* Check whether the given filename matches the given pattern. */
+bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
+                            struct tomoyo_name_union *ptr);
+bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
+                              struct tomoyo_number_union *ptr);
 bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename,
                                 const struct tomoyo_path_info *pattern);
-
-bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num);
-/* Tokenize a line. */
-bool tomoyo_tokenize(char *buffer, char *w[], size_t size);
-/* Write domain policy violation warning message to console? */
-bool tomoyo_verbose_mode(const struct tomoyo_domain_info *domain);
-/* Fill "struct tomoyo_request_info". */
-int tomoyo_init_request_info(struct tomoyo_request_info *r,
-                            struct tomoyo_domain_info *domain,
-                            const u8 index);
-/* Check permission for mount operation. */
-int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
-                           unsigned long flags, void *data_page);
-/* Create "aggregator" entry in exception policy. */
-int tomoyo_write_aggregator(char *data, const bool is_delete);
-int tomoyo_write_transition_control(char *data, const bool is_delete,
-                                   const u8 type);
-/*
- * Create "allow_read/write", "allow_execute", "allow_read", "allow_write",
- * "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir",
- * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar",
- * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_rename" and
- * "allow_link" entry in domain policy.
- */
-int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain,
-                     const bool is_delete);
-/* Create "allow_read" entry in exception policy. */
-int tomoyo_write_globally_readable(char *data, const bool is_delete);
-/* Create "allow_mount" entry in domain policy. */
-int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain,
-                      const bool is_delete);
-/* Create "deny_rewrite" entry in exception policy. */
-int tomoyo_write_no_rewrite(char *data, const bool is_delete);
-/* Create "file_pattern" entry in exception policy. */
-int tomoyo_write_pattern(char *data, const bool is_delete);
-/* Create "path_group"/"number_group" entry in exception policy. */
-int tomoyo_write_group(char *data, const bool is_delete, const u8 type);
-int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
-     __attribute__ ((format(printf, 2, 3)));
-/* Find a domain by the given name. */
-struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
-/* Find or create a domain by the given name. */
-struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
-                                               const u8 profile);
-struct tomoyo_profile *tomoyo_profile(const u8 profile);
-/*
- * Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group".
- */
-struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 type);
-
-/* Check mode for specified functionality. */
-unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
-                               const u8 index);
-/* Fill in "struct tomoyo_path_info" members. */
-void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
-/* Run policy loader when /sbin/init starts. */
-void tomoyo_load_policy(const char *filename);
-
-void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
-
-/* Convert binary string to ascii string. */
+bool tomoyo_permstr(const char *string, const char *keyword);
+bool tomoyo_str_starts(char **src, const char *find);
 char *tomoyo_encode(const char *str);
-
-/*
- * Returns realpath(3) of the given pathname except that
- * ignores chroot'ed root and does not follow the final symlink.
- */
-char *tomoyo_realpath_nofollow(const char *pathname);
-/*
- * Returns realpath(3) of the given pathname except that
- * ignores chroot'ed root and the pathname is already solved.
- */
+char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
+                     va_list args);
+char *tomoyo_read_token(struct tomoyo_acl_param *param);
 char *tomoyo_realpath_from_path(struct path *path);
-/* Get patterned pathname. */
-const char *tomoyo_pattern(const struct tomoyo_path_info *filename);
-
-/* Check memory quota. */
-bool tomoyo_memory_ok(void *ptr);
-void *tomoyo_commit_ok(void *data, const unsigned int size);
-
-/*
- * Keep the given name on the RAM.
- * The RAM is shared, so NEVER try to modify or kfree() the returned name.
- */
+char *tomoyo_realpath_nofollow(const char *pathname);
+const char *tomoyo_get_exe(void);
+const char *tomoyo_yesno(const unsigned int value);
+const struct tomoyo_path_info *tomoyo_compare_name_union
+(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr);
 const struct tomoyo_path_info *tomoyo_get_name(const char *name);
-
-/* Check for memory usage. */
-void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head);
-
-/* Set memory quota. */
-int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head);
-
-/* Initialize mm related code. */
-void __init tomoyo_mm_init(void);
-int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
-                          const struct tomoyo_path_info *filename);
+const struct tomoyo_path_info *tomoyo_path_matches_group
+(const struct tomoyo_path_info *pathname, const struct tomoyo_group *group);
 int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
                                 struct path *path, const int flag);
-int tomoyo_path_number_perm(const u8 operation, struct path *path,
-                           unsigned long number);
+int tomoyo_close_control(struct tomoyo_io_buffer *head);
+int tomoyo_find_next_domain(struct linux_binprm *bprm);
+int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
+                   const u8 index);
+int tomoyo_init_request_info(struct tomoyo_request_info *r,
+                            struct tomoyo_domain_info *domain,
+                            const u8 index);
 int tomoyo_mkdev_perm(const u8 operation, struct path *path,
                      const unsigned int mode, unsigned int dev);
-int tomoyo_path_perm(const u8 operation, struct path *path);
+int tomoyo_mount_permission(char *dev_name, struct path *path,
+                           const char *type, unsigned long flags,
+                           void *data_page);
+int tomoyo_open_control(const u8 type, struct file *file);
 int tomoyo_path2_perm(const u8 operation, struct path *path1,
                      struct path *path2);
-int tomoyo_find_next_domain(struct linux_binprm *bprm);
-
-void tomoyo_print_ulong(char *buffer, const int buffer_len,
-                       const unsigned long value, const u8 type);
-
-/* Drop refcount on tomoyo_name_union. */
-void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
-
-/* Run garbage collector. */
-void tomoyo_run_gc(void);
-
-void tomoyo_memory_free(void *ptr);
-
+int tomoyo_path_number_perm(const u8 operation, struct path *path,
+                           unsigned long number);
+int tomoyo_path_perm(const u8 operation, struct path *path,
+                    const char *target);
+int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
+                          const struct tomoyo_path_info *filename);
+int tomoyo_poll_control(struct file *file, poll_table *wait);
+int tomoyo_poll_log(struct file *file, poll_table *wait);
+int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
+       __printf(2, 3);
 int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
-                        bool is_delete, struct tomoyo_domain_info *domain,
-                        bool (*check_duplicate) (const struct tomoyo_acl_info
-                                                 *,
-                                                 const struct tomoyo_acl_info
-                                                 *),
-                        bool (*merge_duplicate) (struct tomoyo_acl_info *,
-                                                 struct tomoyo_acl_info *,
-                                                 const bool));
+                        struct tomoyo_acl_param *param,
+                        bool (*check_duplicate)
+                        (const struct tomoyo_acl_info *,
+                         const struct tomoyo_acl_info *),
+                        bool (*merge_duplicate)
+                        (struct tomoyo_acl_info *, struct tomoyo_acl_info *,
+                         const bool));
 int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
-                        bool is_delete, struct list_head *list,
-                        bool (*check_duplicate) (const struct tomoyo_acl_head
-                                                 *,
-                                                 const struct tomoyo_acl_head
-                                                 *));
+                        struct tomoyo_acl_param *param,
+                        bool (*check_duplicate)
+                        (const struct tomoyo_acl_head *,
+                         const struct tomoyo_acl_head *));
+int tomoyo_write_aggregator(struct tomoyo_acl_param *param);
+int tomoyo_write_file(struct tomoyo_acl_param *param);
+int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type);
+int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
+                                   const u8 type);
+ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
+                           const int buffer_len);
+ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+                            const char __user *buffer, const int buffer_len);
+struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param);
+struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
+                                               const bool transit);
+struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
+struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
+                                     const u8 idx);
+struct tomoyo_policy_namespace *tomoyo_assign_namespace
+(const char *domainname);
+struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
+                                     const u8 profile);
+unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
+                               const u8 index);
+u8 tomoyo_parse_ulong(unsigned long *result, char **str);
+void *tomoyo_commit_ok(void *data, const unsigned int size);
+void __init tomoyo_load_builtin_policy(void);
+void __init tomoyo_mm_init(void);
 void tomoyo_check_acl(struct tomoyo_request_info *r,
                      bool (*check_entry) (struct tomoyo_request_info *,
                                           const struct tomoyo_acl_info *));
+void tomoyo_check_profile(void);
+void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp);
+void tomoyo_del_condition(struct list_head *element);
+void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
+void tomoyo_get_attributes(struct tomoyo_obj_info *obj);
+void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns);
+void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
+        __printf(2, 3);
+void tomoyo_load_policy(const char *filename);
+void tomoyo_memory_free(void *ptr);
+void tomoyo_normalize_line(unsigned char *buffer);
+void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register);
+void tomoyo_print_ulong(char *buffer, const int buffer_len,
+                       const unsigned long value, const u8 type);
+void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
+void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
+void tomoyo_read_log(struct tomoyo_io_buffer *head);
+void tomoyo_update_stat(const u8 index);
+void tomoyo_warn_oom(const char *function);
+void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
+       __printf(2, 3);
+void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
+                      va_list args);
 
 /********** External variable definitions. **********/
 
-/* Lock for GC. */
-extern struct srcu_struct tomoyo_ss;
-
-/* The list for "struct tomoyo_domain_info". */
+extern bool tomoyo_policy_loaded;
+extern const char * const tomoyo_condition_keyword
+[TOMOYO_MAX_CONDITION_KEYWORD];
+extern const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
+extern const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+                                             + TOMOYO_MAX_MAC_CATEGORY_INDEX];
+extern const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE];
+extern const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
+extern const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX];
+extern const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION];
+extern const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION];
+extern const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION];
+extern struct list_head tomoyo_condition_list;
 extern struct list_head tomoyo_domain_list;
-
-extern struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY];
-extern struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP];
 extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
-
-/* Lock for protecting policy. */
+extern struct list_head tomoyo_namespace_list;
 extern struct mutex tomoyo_policy_lock;
-
-/* Has /sbin/init started? */
-extern bool tomoyo_policy_loaded;
-
-/* The kernel's domain. */
+extern struct srcu_struct tomoyo_ss;
 extern struct tomoyo_domain_info tomoyo_kernel_domain;
-
-extern const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
-extern const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION];
-extern const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION];
-extern const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION];
-
-extern unsigned int tomoyo_quota_for_query;
-extern unsigned int tomoyo_query_memory_size;
+extern struct tomoyo_policy_namespace tomoyo_kernel_namespace;
+extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
+extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
 
 /********** Inlined functions. **********/
 
+/**
+ * tomoyo_read_lock - Take lock for protecting policy.
+ *
+ * Returns index number for tomoyo_read_unlock().
+ */
 static inline int tomoyo_read_lock(void)
 {
        return srcu_read_lock(&tomoyo_ss);
 }
 
+/**
+ * tomoyo_read_unlock - Release lock for protecting policy.
+ *
+ * @idx: Index number returned by tomoyo_read_lock().
+ *
+ * Returns nothing.
+ */
 static inline void tomoyo_read_unlock(int idx)
 {
        srcu_read_unlock(&tomoyo_ss, idx);
 }
 
-/* strcmp() for "struct tomoyo_path_info" structure. */
-static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a,
-                                 const struct tomoyo_path_info *b)
+/**
+ * tomoyo_sys_getppid - Copy of getppid().
+ *
+ * Returns parent process's PID.
+ *
+ * Alpha does not have getppid() defined. To be able to build this module on
+ * Alpha, I have to copy getppid() from kernel/timer.c.
+ */
+static inline pid_t tomoyo_sys_getppid(void)
 {
-       return a->hash != b->hash || strcmp(a->name, b->name);
+       pid_t pid;
+       rcu_read_lock();
+       pid = task_tgid_vnr(current->real_parent);
+       rcu_read_unlock();
+       return pid;
 }
 
 /**
- * tomoyo_valid - Check whether the character is a valid char.
+ * tomoyo_sys_getpid - Copy of getpid().
  *
- * @c: The character to check.
+ * Returns current thread's PID.
  *
- * Returns true if @c is a valid character, false otherwise.
+ * Alpha does not have getpid() defined. To be able to build this module on
+ * Alpha, I have to copy getpid() from kernel/timer.c.
  */
-static inline bool tomoyo_valid(const unsigned char c)
+static inline pid_t tomoyo_sys_getpid(void)
 {
-       return c > ' ' && c < 127;
+       return task_tgid_vnr(current);
 }
 
 /**
- * tomoyo_invalid - Check whether the character is an invalid char.
+ * tomoyo_pathcmp - strcmp() for "struct tomoyo_path_info" structure.
  *
- * @c: The character to check.
+ * @a: Pointer to "struct tomoyo_path_info".
+ * @b: Pointer to "struct tomoyo_path_info".
  *
- * Returns true if @c is an invalid character, false otherwise.
+ * Returns true if @a == @b, false otherwise.
  */
-static inline bool tomoyo_invalid(const unsigned char c)
+static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a,
+                                 const struct tomoyo_path_info *b)
 {
-       return c && (c <= ' ' || c >= 127);
+       return a->hash != b->hash || strcmp(a->name, b->name);
 }
 
+/**
+ * tomoyo_put_name - Drop reference on "struct tomoyo_name".
+ *
+ * @name: Pointer to "struct tomoyo_path_info". Maybe NULL.
+ *
+ * Returns nothing.
+ */
 static inline void tomoyo_put_name(const struct tomoyo_path_info *name)
 {
        if (name) {
                struct tomoyo_name *ptr =
                        container_of(name, typeof(*ptr), entry);
-               atomic_dec(&ptr->users);
+               atomic_dec(&ptr->head.users);
        }
 }
 
+/**
+ * tomoyo_put_condition - Drop reference on "struct tomoyo_condition".
+ *
+ * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
+ *
+ * Returns nothing.
+ */
+static inline void tomoyo_put_condition(struct tomoyo_condition *cond)
+{
+       if (cond)
+               atomic_dec(&cond->head.users);
+}
+
+/**
+ * tomoyo_put_group - Drop reference on "struct tomoyo_group".
+ *
+ * @group: Pointer to "struct tomoyo_group". Maybe NULL.
+ *
+ * Returns nothing.
+ */
 static inline void tomoyo_put_group(struct tomoyo_group *group)
 {
        if (group)
-               atomic_dec(&group->users);
+               atomic_dec(&group->head.users);
 }
 
+/**
+ * tomoyo_domain - Get "struct tomoyo_domain_info" for current thread.
+ *
+ * Returns pointer to "struct tomoyo_domain_info" for current thread.
+ */
 static inline struct tomoyo_domain_info *tomoyo_domain(void)
 {
        return current_cred()->security;
 }
 
+/**
+ * tomoyo_real_domain - Get "struct tomoyo_domain_info" for specified thread.
+ *
+ * @task: Pointer to "struct task_struct".
+ *
+ * Returns pointer to "struct tomoyo_security" for specified thread.
+ */
 static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct
                                                            *task)
 {
        return task_cred_xxx(task, security);
 }
 
-static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *p1,
-                                          const struct tomoyo_acl_info *p2)
+/**
+ * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry.
+ *
+ * @a: Pointer to "struct tomoyo_name_union".
+ * @b: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_name_union
+(const struct tomoyo_name_union *a, const struct tomoyo_name_union *b)
 {
-       return p1->type == p2->type;
+       return a->filename == b->filename && a->group == b->group;
 }
 
-static inline bool tomoyo_same_name_union
-(const struct tomoyo_name_union *p1, const struct tomoyo_name_union *p2)
+/**
+ * tomoyo_same_number_union - Check for duplicated "struct tomoyo_number_union" entry.
+ *
+ * @a: Pointer to "struct tomoyo_number_union".
+ * @b: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_number_union
+(const struct tomoyo_number_union *a, const struct tomoyo_number_union *b)
 {
-       return p1->filename == p2->filename && p1->group == p2->group &&
-               p1->is_group == p2->is_group;
+       return a->values[0] == b->values[0] && a->values[1] == b->values[1] &&
+               a->group == b->group && a->value_type[0] == b->value_type[0] &&
+               a->value_type[1] == b->value_type[1];
 }
 
-static inline bool tomoyo_same_number_union
-(const struct tomoyo_number_union *p1, const struct tomoyo_number_union *p2)
+/**
+ * tomoyo_current_namespace - Get "struct tomoyo_policy_namespace" for current thread.
+ *
+ * Returns pointer to "struct tomoyo_policy_namespace" for current thread.
+ */
+static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void)
+{
+       return tomoyo_domain()->ns;
+}
+
+#if defined(CONFIG_SLOB)
+
+/**
+ * tomoyo_round2 - Round up to power of 2 for calculating memory usage.
+ *
+ * @size: Size to be rounded up.
+ *
+ * Returns @size.
+ *
+ * Since SLOB does not round up, this function simply returns @size.
+ */
+static inline int tomoyo_round2(size_t size)
+{
+       return size;
+}
+
+#else
+
+/**
+ * tomoyo_round2 - Round up to power of 2 for calculating memory usage.
+ *
+ * @size: Size to be rounded up.
+ *
+ * Returns rounded size.
+ *
+ * Strictly speaking, SLAB may be able to allocate (e.g.) 96 bytes instead of
+ * (e.g.) 128 bytes.
+ */
+static inline int tomoyo_round2(size_t size)
 {
-       return p1->values[0] == p2->values[0] && p1->values[1] == p2->values[1]
-               && p1->group == p2->group && p1->min_type == p2->min_type &&
-               p1->max_type == p2->max_type && p1->is_group == p2->is_group;
+#if PAGE_SIZE == 4096
+       size_t bsize = 32;
+#else
+       size_t bsize = 64;
+#endif
+       if (!size)
+               return 0;
+       while (size > bsize)
+               bsize <<= 1;
+       return bsize;
 }
 
+#endif
+
 /**
  * list_for_each_cookie - iterate over a list with cookie.
  * @pos:        the &struct list_head to use as a loop cursor.
diff --git a/security/tomoyo/condition.c b/security/tomoyo/condition.c
new file mode 100644 (file)
index 0000000..8a05f71
--- /dev/null
@@ -0,0 +1,1035 @@
+/*
+ * security/tomoyo/condition.c
+ *
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
+ */
+
+#include "common.h"
+#include <linux/slab.h>
+
+/* List of "struct tomoyo_condition". */
+LIST_HEAD(tomoyo_condition_list);
+
+/**
+ * tomoyo_argv - Check argv[] in "struct linux_binbrm".
+ *
+ * @index:   Index number of @arg_ptr.
+ * @arg_ptr: Contents of argv[@index].
+ * @argc:    Length of @argv.
+ * @argv:    Pointer to "struct tomoyo_argv".
+ * @checked: Set to true if @argv[@index] was found.
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_argv(const unsigned int index, const char *arg_ptr,
+                       const int argc, const struct tomoyo_argv *argv,
+                       u8 *checked)
+{
+       int i;
+       struct tomoyo_path_info arg;
+       arg.name = arg_ptr;
+       for (i = 0; i < argc; argv++, checked++, i++) {
+               bool result;
+               if (index != argv->index)
+                       continue;
+               *checked = 1;
+               tomoyo_fill_path_info(&arg);
+               result = tomoyo_path_matches_pattern(&arg, argv->value);
+               if (argv->is_not)
+                       result = !result;
+               if (!result)
+                       return false;
+       }
+       return true;
+}
+
+/**
+ * tomoyo_envp - Check envp[] in "struct linux_binbrm".
+ *
+ * @env_name:  The name of environment variable.
+ * @env_value: The value of environment variable.
+ * @envc:      Length of @envp.
+ * @envp:      Pointer to "struct tomoyo_envp".
+ * @checked:   Set to true if @envp[@env_name] was found.
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_envp(const char *env_name, const char *env_value,
+                       const int envc, const struct tomoyo_envp *envp,
+                       u8 *checked)
+{
+       int i;
+       struct tomoyo_path_info name;
+       struct tomoyo_path_info value;
+       name.name = env_name;
+       tomoyo_fill_path_info(&name);
+       value.name = env_value;
+       tomoyo_fill_path_info(&value);
+       for (i = 0; i < envc; envp++, checked++, i++) {
+               bool result;
+               if (!tomoyo_path_matches_pattern(&name, envp->name))
+                       continue;
+               *checked = 1;
+               if (envp->value) {
+                       result = tomoyo_path_matches_pattern(&value,
+                                                            envp->value);
+                       if (envp->is_not)
+                               result = !result;
+               } else {
+                       result = true;
+                       if (!envp->is_not)
+                               result = !result;
+               }
+               if (!result)
+                       return false;
+       }
+       return true;
+}
+
+/**
+ * tomoyo_scan_bprm - Scan "struct linux_binprm".
+ *
+ * @ee:   Pointer to "struct tomoyo_execve".
+ * @argc: Length of @argc.
+ * @argv: Pointer to "struct tomoyo_argv".
+ * @envc: Length of @envp.
+ * @envp: Poiner to "struct tomoyo_envp".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_scan_bprm(struct tomoyo_execve *ee,
+                            const u16 argc, const struct tomoyo_argv *argv,
+                            const u16 envc, const struct tomoyo_envp *envp)
+{
+       struct linux_binprm *bprm = ee->bprm;
+       struct tomoyo_page_dump *dump = &ee->dump;
+       char *arg_ptr = ee->tmp;
+       int arg_len = 0;
+       unsigned long pos = bprm->p;
+       int offset = pos % PAGE_SIZE;
+       int argv_count = bprm->argc;
+       int envp_count = bprm->envc;
+       bool result = true;
+       u8 local_checked[32];
+       u8 *checked;
+       if (argc + envc <= sizeof(local_checked)) {
+               checked = local_checked;
+               memset(local_checked, 0, sizeof(local_checked));
+       } else {
+               checked = kzalloc(argc + envc, GFP_NOFS);
+               if (!checked)
+                       return false;
+       }
+       while (argv_count || envp_count) {
+               if (!tomoyo_dump_page(bprm, pos, dump)) {
+                       result = false;
+                       goto out;
+               }
+               pos += PAGE_SIZE - offset;
+               while (offset < PAGE_SIZE) {
+                       /* Read. */
+                       const char *kaddr = dump->data;
+                       const unsigned char c = kaddr[offset++];
+                       if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) {
+                               if (c == '\\') {
+                                       arg_ptr[arg_len++] = '\\';
+                                       arg_ptr[arg_len++] = '\\';
+                               } else if (c > ' ' && c < 127) {
+                                       arg_ptr[arg_len++] = c;
+                               } else {
+                                       arg_ptr[arg_len++] = '\\';
+                                       arg_ptr[arg_len++] = (c >> 6) + '0';
+                                       arg_ptr[arg_len++] =
+                                               ((c >> 3) & 7) + '0';
+                                       arg_ptr[arg_len++] = (c & 7) + '0';
+                               }
+                       } else {
+                               arg_ptr[arg_len] = '\0';
+                       }
+                       if (c)
+                               continue;
+                       /* Check. */
+                       if (argv_count) {
+                               if (!tomoyo_argv(bprm->argc - argv_count,
+                                                arg_ptr, argc, argv,
+                                                checked)) {
+                                       result = false;
+                                       break;
+                               }
+                               argv_count--;
+                       } else if (envp_count) {
+                               char *cp = strchr(arg_ptr, '=');
+                               if (cp) {
+                                       *cp = '\0';
+                                       if (!tomoyo_envp(arg_ptr, cp + 1,
+                                                        envc, envp,
+                                                        checked + argc)) {
+                                               result = false;
+                                               break;
+                                       }
+                               }
+                               envp_count--;
+                       } else {
+                               break;
+                       }
+                       arg_len = 0;
+               }
+               offset = 0;
+               if (!result)
+                       break;
+       }
+out:
+       if (result) {
+               int i;
+               /* Check not-yet-checked entries. */
+               for (i = 0; i < argc; i++) {
+                       if (checked[i])
+                               continue;
+                       /*
+                        * Return true only if all unchecked indexes in
+                        * bprm->argv[] are not matched.
+                        */
+                       if (argv[i].is_not)
+                               continue;
+                       result = false;
+                       break;
+               }
+               for (i = 0; i < envc; envp++, i++) {
+                       if (checked[argc + i])
+                               continue;
+                       /*
+                        * Return true only if all unchecked environ variables
+                        * in bprm->envp[] are either undefined or not matched.
+                        */
+                       if ((!envp->value && !envp->is_not) ||
+                           (envp->value && envp->is_not))
+                               continue;
+                       result = false;
+                       break;
+               }
+       }
+       if (checked != local_checked)
+               kfree(checked);
+       return result;
+}
+
+/**
+ * tomoyo_scan_exec_realpath - Check "exec.realpath" parameter of "struct tomoyo_condition".
+ *
+ * @file:  Pointer to "struct file".
+ * @ptr:   Pointer to "struct tomoyo_name_union".
+ * @match: True if "exec.realpath=", false if "exec.realpath!=".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_scan_exec_realpath(struct file *file,
+                                     const struct tomoyo_name_union *ptr,
+                                     const bool match)
+{
+       bool result;
+       struct tomoyo_path_info exe;
+       if (!file)
+               return false;
+       exe.name = tomoyo_realpath_from_path(&file->f_path);
+       if (!exe.name)
+               return false;
+       tomoyo_fill_path_info(&exe);
+       result = tomoyo_compare_name_union(&exe, ptr);
+       kfree(exe.name);
+       return result == match;
+}
+
+/**
+ * tomoyo_get_dqword - tomoyo_get_name() for a quoted string.
+ *
+ * @start: String to save.
+ *
+ * Returns pointer to "struct tomoyo_path_info" on success, NULL otherwise.
+ */
+static const struct tomoyo_path_info *tomoyo_get_dqword(char *start)
+{
+       char *cp = start + strlen(start) - 1;
+       if (cp == start || *start++ != '"' || *cp != '"')
+               return NULL;
+       *cp = '\0';
+       if (*start && !tomoyo_correct_word(start))
+               return NULL;
+       return tomoyo_get_name(start);
+}
+
+/**
+ * tomoyo_parse_name_union_quoted - Parse a quoted word.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr:   Pointer to "struct tomoyo_name_union".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_name_union_quoted(struct tomoyo_acl_param *param,
+                                          struct tomoyo_name_union *ptr)
+{
+       char *filename = param->data;
+       if (*filename == '@')
+               return tomoyo_parse_name_union(param, ptr);
+       ptr->filename = tomoyo_get_dqword(filename);
+       return ptr->filename != NULL;
+}
+
+/**
+ * tomoyo_parse_argv - Parse an argv[] condition part.
+ *
+ * @left:  Lefthand value.
+ * @right: Righthand value.
+ * @argv:  Pointer to "struct tomoyo_argv".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_argv(char *left, char *right,
+                             struct tomoyo_argv *argv)
+{
+       if (tomoyo_parse_ulong(&argv->index, &left) !=
+           TOMOYO_VALUE_TYPE_DECIMAL || *left++ != ']' || *left)
+               return false;
+       argv->value = tomoyo_get_dqword(right);
+       return argv->value != NULL;
+}
+
+/**
+ * tomoyo_parse_envp - Parse an envp[] condition part.
+ *
+ * @left:  Lefthand value.
+ * @right: Righthand value.
+ * @envp:  Pointer to "struct tomoyo_envp".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_envp(char *left, char *right,
+                             struct tomoyo_envp *envp)
+{
+       const struct tomoyo_path_info *name;
+       const struct tomoyo_path_info *value;
+       char *cp = left + strlen(left) - 1;
+       if (*cp-- != ']' || *cp != '"')
+               goto out;
+       *cp = '\0';
+       if (!tomoyo_correct_word(left))
+               goto out;
+       name = tomoyo_get_name(left);
+       if (!name)
+               goto out;
+       if (!strcmp(right, "NULL")) {
+               value = NULL;
+       } else {
+               value = tomoyo_get_dqword(right);
+               if (!value) {
+                       tomoyo_put_name(name);
+                       goto out;
+               }
+       }
+       envp->name = name;
+       envp->value = value;
+       return true;
+out:
+       return false;
+}
+
+/**
+ * tomoyo_same_condition - Check for duplicated "struct tomoyo_condition" entry.
+ *
+ * @a: Pointer to "struct tomoyo_condition".
+ * @b: Pointer to "struct tomoyo_condition".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_condition(const struct tomoyo_condition *a,
+                                        const struct tomoyo_condition *b)
+{
+       return a->size == b->size && a->condc == b->condc &&
+               a->numbers_count == b->numbers_count &&
+               a->names_count == b->names_count &&
+               a->argc == b->argc && a->envc == b->envc &&
+               !memcmp(a + 1, b + 1, a->size - sizeof(*a));
+}
+
+/**
+ * tomoyo_condition_type - Get condition type.
+ *
+ * @word: Keyword string.
+ *
+ * Returns one of values in "enum tomoyo_conditions_index" on success,
+ * TOMOYO_MAX_CONDITION_KEYWORD otherwise.
+ */
+static u8 tomoyo_condition_type(const char *word)
+{
+       u8 i;
+       for (i = 0; i < TOMOYO_MAX_CONDITION_KEYWORD; i++) {
+               if (!strcmp(word, tomoyo_condition_keyword[i]))
+                       break;
+       }
+       return i;
+}
+
+/* Define this to enable debug mode. */
+/* #define DEBUG_CONDITION */
+
+#ifdef DEBUG_CONDITION
+#define dprintk printk
+#else
+#define dprintk(...) do { } while (0)
+#endif
+
+/**
+ * tomoyo_commit_condition - Commit "struct tomoyo_condition".
+ *
+ * @entry: Pointer to "struct tomoyo_condition".
+ *
+ * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise.
+ *
+ * This function merges duplicated entries. This function returns NULL if
+ * @entry is not duplicated but memory quota for policy has exceeded.
+ */
+static struct tomoyo_condition *tomoyo_commit_condition
+(struct tomoyo_condition *entry)
+{
+       struct tomoyo_condition *ptr;
+       bool found = false;
+       if (mutex_lock_interruptible(&tomoyo_policy_lock)) {
+               dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__);
+               ptr = NULL;
+               found = true;
+               goto out;
+       }
+       list_for_each_entry_rcu(ptr, &tomoyo_condition_list, head.list) {
+               if (!tomoyo_same_condition(ptr, entry))
+                       continue;
+               /* Same entry found. Share this entry. */
+               atomic_inc(&ptr->head.users);
+               found = true;
+               break;
+       }
+       if (!found) {
+               if (tomoyo_memory_ok(entry)) {
+                       atomic_set(&entry->head.users, 1);
+                       list_add_rcu(&entry->head.list,
+                                    &tomoyo_condition_list);
+               } else {
+                       found = true;
+                       ptr = NULL;
+               }
+       }
+       mutex_unlock(&tomoyo_policy_lock);
+out:
+       if (found) {
+               tomoyo_del_condition(&entry->head.list);
+               kfree(entry);
+               entry = ptr;
+       }
+       return entry;
+}
+
+/**
+ * tomoyo_get_condition - Parse condition part.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise.
+ */
+struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param)
+{
+       struct tomoyo_condition *entry = NULL;
+       struct tomoyo_condition_element *condp = NULL;
+       struct tomoyo_number_union *numbers_p = NULL;
+       struct tomoyo_name_union *names_p = NULL;
+       struct tomoyo_argv *argv = NULL;
+       struct tomoyo_envp *envp = NULL;
+       struct tomoyo_condition e = { };
+       char * const start_of_string = param->data;
+       char * const end_of_string = start_of_string + strlen(start_of_string);
+       char *pos;
+rerun:
+       pos = start_of_string;
+       while (1) {
+               u8 left = -1;
+               u8 right = -1;
+               char *left_word = pos;
+               char *cp;
+               char *right_word;
+               bool is_not;
+               if (!*left_word)
+                       break;
+               /*
+                * Since left-hand condition does not allow use of "path_group"
+                * or "number_group" and environment variable's names do not
+                * accept '=', it is guaranteed that the original line consists
+                * of one or more repetition of $left$operator$right blocks
+                * where "$left is free from '=' and ' '" and "$operator is
+                * either '=' or '!='" and "$right is free from ' '".
+                * Therefore, we can reconstruct the original line at the end
+                * of dry run even if we overwrite $operator with '\0'.
+                */
+               cp = strchr(pos, ' ');
+               if (cp) {
+                       *cp = '\0'; /* Will restore later. */
+                       pos = cp + 1;
+               } else {
+                       pos = "";
+               }
+               right_word = strchr(left_word, '=');
+               if (!right_word || right_word == left_word)
+                       goto out;
+               is_not = *(right_word - 1) == '!';
+               if (is_not)
+                       *(right_word++ - 1) = '\0'; /* Will restore later. */
+               else if (*(right_word + 1) != '=')
+                       *right_word++ = '\0'; /* Will restore later. */
+               else
+                       goto out;
+               dprintk(KERN_WARNING "%u: <%s>%s=<%s>\n", __LINE__, left_word,
+                       is_not ? "!" : "", right_word);
+               if (!strncmp(left_word, "exec.argv[", 10)) {
+                       if (!argv) {
+                               e.argc++;
+                               e.condc++;
+                       } else {
+                               e.argc--;
+                               e.condc--;
+                               left = TOMOYO_ARGV_ENTRY;
+                               argv->is_not = is_not;
+                               if (!tomoyo_parse_argv(left_word + 10,
+                                                      right_word, argv++))
+                                       goto out;
+                       }
+                       goto store_value;
+               }
+               if (!strncmp(left_word, "exec.envp[\"", 11)) {
+                       if (!envp) {
+                               e.envc++;
+                               e.condc++;
+                       } else {
+                               e.envc--;
+                               e.condc--;
+                               left = TOMOYO_ENVP_ENTRY;
+                               envp->is_not = is_not;
+                               if (!tomoyo_parse_envp(left_word + 11,
+                                                      right_word, envp++))
+                                       goto out;
+                       }
+                       goto store_value;
+               }
+               left = tomoyo_condition_type(left_word);
+               dprintk(KERN_WARNING "%u: <%s> left=%u\n", __LINE__, left_word,
+                       left);
+               if (left == TOMOYO_MAX_CONDITION_KEYWORD) {
+                       if (!numbers_p) {
+                               e.numbers_count++;
+                       } else {
+                               e.numbers_count--;
+                               left = TOMOYO_NUMBER_UNION;
+                               param->data = left_word;
+                               if (*left_word == '@' ||
+                                   !tomoyo_parse_number_union(param,
+                                                              numbers_p++))
+                                       goto out;
+                       }
+               }
+               if (!condp)
+                       e.condc++;
+               else
+                       e.condc--;
+               if (left == TOMOYO_EXEC_REALPATH ||
+                   left == TOMOYO_SYMLINK_TARGET) {
+                       if (!names_p) {
+                               e.names_count++;
+                       } else {
+                               e.names_count--;
+                               right = TOMOYO_NAME_UNION;
+                               param->data = right_word;
+                               if (!tomoyo_parse_name_union_quoted(param,
+                                                                   names_p++))
+                                       goto out;
+                       }
+                       goto store_value;
+               }
+               right = tomoyo_condition_type(right_word);
+               if (right == TOMOYO_MAX_CONDITION_KEYWORD) {
+                       if (!numbers_p) {
+                               e.numbers_count++;
+                       } else {
+                               e.numbers_count--;
+                               right = TOMOYO_NUMBER_UNION;
+                               param->data = right_word;
+                               if (!tomoyo_parse_number_union(param,
+                                                              numbers_p++))
+                                       goto out;
+                       }
+               }
+store_value:
+               if (!condp) {
+                       dprintk(KERN_WARNING "%u: dry_run left=%u right=%u "
+                               "match=%u\n", __LINE__, left, right, !is_not);
+                       continue;
+               }
+               condp->left = left;
+               condp->right = right;
+               condp->equals = !is_not;
+               dprintk(KERN_WARNING "%u: left=%u right=%u match=%u\n",
+                       __LINE__, condp->left, condp->right,
+                       condp->equals);
+               condp++;
+       }
+       dprintk(KERN_INFO "%u: cond=%u numbers=%u names=%u ac=%u ec=%u\n",
+               __LINE__, e.condc, e.numbers_count, e.names_count, e.argc,
+               e.envc);
+       if (entry) {
+               BUG_ON(e.names_count | e.numbers_count | e.argc | e.envc |
+                      e.condc);
+               return tomoyo_commit_condition(entry);
+       }
+       e.size = sizeof(*entry)
+               + e.condc * sizeof(struct tomoyo_condition_element)
+               + e.numbers_count * sizeof(struct tomoyo_number_union)
+               + e.names_count * sizeof(struct tomoyo_name_union)
+               + e.argc * sizeof(struct tomoyo_argv)
+               + e.envc * sizeof(struct tomoyo_envp);
+       entry = kzalloc(e.size, GFP_NOFS);
+       if (!entry)
+               return NULL;
+       *entry = e;
+       condp = (struct tomoyo_condition_element *) (entry + 1);
+       numbers_p = (struct tomoyo_number_union *) (condp + e.condc);
+       names_p = (struct tomoyo_name_union *) (numbers_p + e.numbers_count);
+       argv = (struct tomoyo_argv *) (names_p + e.names_count);
+       envp = (struct tomoyo_envp *) (argv + e.argc);
+       {
+               bool flag = false;
+               for (pos = start_of_string; pos < end_of_string; pos++) {
+                       if (*pos)
+                               continue;
+                       if (flag) /* Restore " ". */
+                               *pos = ' ';
+                       else if (*(pos + 1) == '=') /* Restore "!=". */
+                               *pos = '!';
+                       else /* Restore "=". */
+                               *pos = '=';
+                       flag = !flag;
+               }
+       }
+       goto rerun;
+out:
+       dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__);
+       if (entry) {
+               tomoyo_del_condition(&entry->head.list);
+               kfree(entry);
+       }
+       return NULL;
+}
+
+/**
+ * tomoyo_get_attributes - Revalidate "struct inode".
+ *
+ * @obj: Pointer to "struct tomoyo_obj_info".
+ *
+ * Returns nothing.
+ */
+void tomoyo_get_attributes(struct tomoyo_obj_info *obj)
+{
+       u8 i;
+       struct dentry *dentry = NULL;
+
+       for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
+               struct inode *inode;
+               switch (i) {
+               case TOMOYO_PATH1:
+                       dentry = obj->path1.dentry;
+                       if (!dentry)
+                               continue;
+                       break;
+               case TOMOYO_PATH2:
+                       dentry = obj->path2.dentry;
+                       if (!dentry)
+                               continue;
+                       break;
+               default:
+                       if (!dentry)
+                               continue;
+                       dentry = dget_parent(dentry);
+                       break;
+               }
+               inode = dentry->d_inode;
+               if (inode) {
+                       struct tomoyo_mini_stat *stat = &obj->stat[i];
+                       stat->uid  = inode->i_uid;
+                       stat->gid  = inode->i_gid;
+                       stat->ino  = inode->i_ino;
+                       stat->mode = inode->i_mode;
+                       stat->dev  = inode->i_sb->s_dev;
+                       stat->rdev = inode->i_rdev;
+                       obj->stat_valid[i] = true;
+               }
+               if (i & 1) /* i == TOMOYO_PATH1_PARENT ||
+                             i == TOMOYO_PATH2_PARENT */
+                       dput(dentry);
+       }
+}
+
+/**
+ * tomoyo_condition - Check condition part.
+ *
+ * @r:    Pointer to "struct tomoyo_request_info".
+ * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
+ *
+ * Returns true on success, false otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+bool tomoyo_condition(struct tomoyo_request_info *r,
+                     const struct tomoyo_condition *cond)
+{
+       u32 i;
+       unsigned long min_v[2] = { 0, 0 };
+       unsigned long max_v[2] = { 0, 0 };
+       const struct tomoyo_condition_element *condp;
+       const struct tomoyo_number_union *numbers_p;
+       const struct tomoyo_name_union *names_p;
+       const struct tomoyo_argv *argv;
+       const struct tomoyo_envp *envp;
+       struct tomoyo_obj_info *obj;
+       u16 condc;
+       u16 argc;
+       u16 envc;
+       struct linux_binprm *bprm = NULL;
+       if (!cond)
+               return true;
+       condc = cond->condc;
+       argc = cond->argc;
+       envc = cond->envc;
+       obj = r->obj;
+       if (r->ee)
+               bprm = r->ee->bprm;
+       if (!bprm && (argc || envc))
+               return false;
+       condp = (struct tomoyo_condition_element *) (cond + 1);
+       numbers_p = (const struct tomoyo_number_union *) (condp + condc);
+       names_p = (const struct tomoyo_name_union *)
+               (numbers_p + cond->numbers_count);
+       argv = (const struct tomoyo_argv *) (names_p + cond->names_count);
+       envp = (const struct tomoyo_envp *) (argv + argc);
+       for (i = 0; i < condc; i++) {
+               const bool match = condp->equals;
+               const u8 left = condp->left;
+               const u8 right = condp->right;
+               bool is_bitop[2] = { false, false };
+               u8 j;
+               condp++;
+               /* Check argv[] and envp[] later. */
+               if (left == TOMOYO_ARGV_ENTRY || left == TOMOYO_ENVP_ENTRY)
+                       continue;
+               /* Check string expressions. */
+               if (right == TOMOYO_NAME_UNION) {
+                       const struct tomoyo_name_union *ptr = names_p++;
+                       switch (left) {
+                               struct tomoyo_path_info *symlink;
+                               struct tomoyo_execve *ee;
+                               struct file *file;
+                       case TOMOYO_SYMLINK_TARGET:
+                               symlink = obj ? obj->symlink_target : NULL;
+                               if (!symlink ||
+                                   !tomoyo_compare_name_union(symlink, ptr)
+                                   == match)
+                                       goto out;
+                               break;
+                       case TOMOYO_EXEC_REALPATH:
+                               ee = r->ee;
+                               file = ee ? ee->bprm->file : NULL;
+                               if (!tomoyo_scan_exec_realpath(file, ptr,
+                                                              match))
+                                       goto out;
+                               break;
+                       }
+                       continue;
+               }
+               /* Check numeric or bit-op expressions. */
+               for (j = 0; j < 2; j++) {
+                       const u8 index = j ? right : left;
+                       unsigned long value = 0;
+                       switch (index) {
+                       case TOMOYO_TASK_UID:
+                               value = current_uid();
+                               break;
+                       case TOMOYO_TASK_EUID:
+                               value = current_euid();
+                               break;
+                       case TOMOYO_TASK_SUID:
+                               value = current_suid();
+                               break;
+                       case TOMOYO_TASK_FSUID:
+                               value = current_fsuid();
+                               break;
+                       case TOMOYO_TASK_GID:
+                               value = current_gid();
+                               break;
+                       case TOMOYO_TASK_EGID:
+                               value = current_egid();
+                               break;
+                       case TOMOYO_TASK_SGID:
+                               value = current_sgid();
+                               break;
+                       case TOMOYO_TASK_FSGID:
+                               value = current_fsgid();
+                               break;
+                       case TOMOYO_TASK_PID:
+                               value = tomoyo_sys_getpid();
+                               break;
+                       case TOMOYO_TASK_PPID:
+                               value = tomoyo_sys_getppid();
+                               break;
+                       case TOMOYO_TYPE_IS_SOCKET:
+                               value = S_IFSOCK;
+                               break;
+                       case TOMOYO_TYPE_IS_SYMLINK:
+                               value = S_IFLNK;
+                               break;
+                       case TOMOYO_TYPE_IS_FILE:
+                               value = S_IFREG;
+                               break;
+                       case TOMOYO_TYPE_IS_BLOCK_DEV:
+                               value = S_IFBLK;
+                               break;
+                       case TOMOYO_TYPE_IS_DIRECTORY:
+                               value = S_IFDIR;
+                               break;
+                       case TOMOYO_TYPE_IS_CHAR_DEV:
+                               value = S_IFCHR;
+                               break;
+                       case TOMOYO_TYPE_IS_FIFO:
+                               value = S_IFIFO;
+                               break;
+                       case TOMOYO_MODE_SETUID:
+                               value = S_ISUID;
+                               break;
+                       case TOMOYO_MODE_SETGID:
+                               value = S_ISGID;
+                               break;
+                       case TOMOYO_MODE_STICKY:
+                               value = S_ISVTX;
+                               break;
+                       case TOMOYO_MODE_OWNER_READ:
+                               value = S_IRUSR;
+                               break;
+                       case TOMOYO_MODE_OWNER_WRITE:
+                               value = S_IWUSR;
+                               break;
+                       case TOMOYO_MODE_OWNER_EXECUTE:
+                               value = S_IXUSR;
+                               break;
+                       case TOMOYO_MODE_GROUP_READ:
+                               value = S_IRGRP;
+                               break;
+                       case TOMOYO_MODE_GROUP_WRITE:
+                               value = S_IWGRP;
+                               break;
+                       case TOMOYO_MODE_GROUP_EXECUTE:
+                               value = S_IXGRP;
+                               break;
+                       case TOMOYO_MODE_OTHERS_READ:
+                               value = S_IROTH;
+                               break;
+                       case TOMOYO_MODE_OTHERS_WRITE:
+                               value = S_IWOTH;
+                               break;
+                       case TOMOYO_MODE_OTHERS_EXECUTE:
+                               value = S_IXOTH;
+                               break;
+                       case TOMOYO_EXEC_ARGC:
+                               if (!bprm)
+                                       goto out;
+                               value = bprm->argc;
+                               break;
+                       case TOMOYO_EXEC_ENVC:
+                               if (!bprm)
+                                       goto out;
+                               value = bprm->envc;
+                               break;
+                       case TOMOYO_NUMBER_UNION:
+                               /* Fetch values later. */
+                               break;
+                       default:
+                               if (!obj)
+                                       goto out;
+                               if (!obj->validate_done) {
+                                       tomoyo_get_attributes(obj);
+                                       obj->validate_done = true;
+                               }
+                               {
+                                       u8 stat_index;
+                                       struct tomoyo_mini_stat *stat;
+                                       switch (index) {
+                                       case TOMOYO_PATH1_UID:
+                                       case TOMOYO_PATH1_GID:
+                                       case TOMOYO_PATH1_INO:
+                                       case TOMOYO_PATH1_MAJOR:
+                                       case TOMOYO_PATH1_MINOR:
+                                       case TOMOYO_PATH1_TYPE:
+                                       case TOMOYO_PATH1_DEV_MAJOR:
+                                       case TOMOYO_PATH1_DEV_MINOR:
+                                       case TOMOYO_PATH1_PERM:
+                                               stat_index = TOMOYO_PATH1;
+                                               break;
+                                       case TOMOYO_PATH2_UID:
+                                       case TOMOYO_PATH2_GID:
+                                       case TOMOYO_PATH2_INO:
+                                       case TOMOYO_PATH2_MAJOR:
+                                       case TOMOYO_PATH2_MINOR:
+                                       case TOMOYO_PATH2_TYPE:
+                                       case TOMOYO_PATH2_DEV_MAJOR:
+                                       case TOMOYO_PATH2_DEV_MINOR:
+                                       case TOMOYO_PATH2_PERM:
+                                               stat_index = TOMOYO_PATH2;
+                                               break;
+                                       case TOMOYO_PATH1_PARENT_UID:
+                                       case TOMOYO_PATH1_PARENT_GID:
+                                       case TOMOYO_PATH1_PARENT_INO:
+                                       case TOMOYO_PATH1_PARENT_PERM:
+                                               stat_index =
+                                                       TOMOYO_PATH1_PARENT;
+                                               break;
+                                       case TOMOYO_PATH2_PARENT_UID:
+                                       case TOMOYO_PATH2_PARENT_GID:
+                                       case TOMOYO_PATH2_PARENT_INO:
+                                       case TOMOYO_PATH2_PARENT_PERM:
+                                               stat_index =
+                                                       TOMOYO_PATH2_PARENT;
+                                               break;
+                                       default:
+                                               goto out;
+                                       }
+                                       if (!obj->stat_valid[stat_index])
+                                               goto out;
+                                       stat = &obj->stat[stat_index];
+                                       switch (index) {
+                                       case TOMOYO_PATH1_UID:
+                                       case TOMOYO_PATH2_UID:
+                                       case TOMOYO_PATH1_PARENT_UID:
+                                       case TOMOYO_PATH2_PARENT_UID:
+                                               value = stat->uid;
+                                               break;
+                                       case TOMOYO_PATH1_GID:
+                                       case TOMOYO_PATH2_GID:
+                                       case TOMOYO_PATH1_PARENT_GID:
+                                       case TOMOYO_PATH2_PARENT_GID:
+                                               value = stat->gid;
+                                               break;
+                                       case TOMOYO_PATH1_INO:
+                                       case TOMOYO_PATH2_INO:
+                                       case TOMOYO_PATH1_PARENT_INO:
+                                       case TOMOYO_PATH2_PARENT_INO:
+                                               value = stat->ino;
+                                               break;
+                                       case TOMOYO_PATH1_MAJOR:
+                                       case TOMOYO_PATH2_MAJOR:
+                                               value = MAJOR(stat->dev);
+                                               break;
+                                       case TOMOYO_PATH1_MINOR:
+                                       case TOMOYO_PATH2_MINOR:
+                                               value = MINOR(stat->dev);
+                                               break;
+                                       case TOMOYO_PATH1_TYPE:
+                                       case TOMOYO_PATH2_TYPE:
+                                               value = stat->mode & S_IFMT;
+                                               break;
+                                       case TOMOYO_PATH1_DEV_MAJOR:
+                                       case TOMOYO_PATH2_DEV_MAJOR:
+                                               value = MAJOR(stat->rdev);
+                                               break;
+                                       case TOMOYO_PATH1_DEV_MINOR:
+                                       case TOMOYO_PATH2_DEV_MINOR:
+                                               value = MINOR(stat->rdev);
+                                               break;
+                                       case TOMOYO_PATH1_PERM:
+                                       case TOMOYO_PATH2_PERM:
+                                       case TOMOYO_PATH1_PARENT_PERM:
+                                       case TOMOYO_PATH2_PARENT_PERM:
+                                               value = stat->mode & S_IALLUGO;
+                                               break;
+                                       }
+                               }
+                               break;
+                       }
+                       max_v[j] = value;
+                       min_v[j] = value;
+                       switch (index) {
+                       case TOMOYO_MODE_SETUID:
+                       case TOMOYO_MODE_SETGID:
+                       case TOMOYO_MODE_STICKY:
+                       case TOMOYO_MODE_OWNER_READ:
+                       case TOMOYO_MODE_OWNER_WRITE:
+                       case TOMOYO_MODE_OWNER_EXECUTE:
+                       case TOMOYO_MODE_GROUP_READ:
+                       case TOMOYO_MODE_GROUP_WRITE:
+                       case TOMOYO_MODE_GROUP_EXECUTE:
+                       case TOMOYO_MODE_OTHERS_READ:
+                       case TOMOYO_MODE_OTHERS_WRITE:
+                       case TOMOYO_MODE_OTHERS_EXECUTE:
+                               is_bitop[j] = true;
+                       }
+               }
+               if (left == TOMOYO_NUMBER_UNION) {
+                       /* Fetch values now. */
+                       const struct tomoyo_number_union *ptr = numbers_p++;
+                       min_v[0] = ptr->values[0];
+                       max_v[0] = ptr->values[1];
+               }
+               if (right == TOMOYO_NUMBER_UNION) {
+                       /* Fetch values now. */
+                       const struct tomoyo_number_union *ptr = numbers_p++;
+                       if (ptr->group) {
+                               if (tomoyo_number_matches_group(min_v[0],
+                                                               max_v[0],
+                                                               ptr->group)
+                                   == match)
+                                       continue;
+                       } else {
+                               if ((min_v[0] <= ptr->values[1] &&
+                                    max_v[0] >= ptr->values[0]) == match)
+                                       continue;
+                       }
+                       goto out;
+               }
+               /*
+                * Bit operation is valid only when counterpart value
+                * represents permission.
+                */
+               if (is_bitop[0] && is_bitop[1]) {
+                       goto out;
+               } else if (is_bitop[0]) {
+                       switch (right) {
+                       case TOMOYO_PATH1_PERM:
+                       case TOMOYO_PATH1_PARENT_PERM:
+                       case TOMOYO_PATH2_PERM:
+                       case TOMOYO_PATH2_PARENT_PERM:
+                               if (!(max_v[0] & max_v[1]) == !match)
+                                       continue;
+                       }
+                       goto out;
+               } else if (is_bitop[1]) {
+                       switch (left) {
+                       case TOMOYO_PATH1_PERM:
+                       case TOMOYO_PATH1_PARENT_PERM:
+                       case TOMOYO_PATH2_PERM:
+                       case TOMOYO_PATH2_PARENT_PERM:
+                               if (!(max_v[0] & max_v[1]) == !match)
+                                       continue;
+                       }
+                       goto out;
+               }
+               /* Normal value range comparison. */
+               if ((min_v[0] <= max_v[1] && max_v[0] >= min_v[1]) == match)
+                       continue;
+out:
+               return false;
+       }
+       /* Check argv[] and envp[] now. */
+       if (r->ee && (argc || envc))
+               return tomoyo_scan_bprm(r->ee, argc, argv, envc, envp);
+       return true;
+}
index 3538840..cd0f92d 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/domain.c
  *
- * Domain transition functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
@@ -20,8 +18,7 @@ struct tomoyo_domain_info tomoyo_kernel_domain;
  *
  * @new_entry:       Pointer to "struct tomoyo_acl_info".
  * @size:            Size of @new_entry in bytes.
- * @is_delete:       True if it is a delete request.
- * @list:            Pointer to "struct list_head".
+ * @param:           Pointer to "struct tomoyo_acl_param".
  * @check_duplicate: Callback function to find duplicated entry.
  *
  * Returns 0 on success, negative value otherwise.
@@ -29,25 +26,26 @@ struct tomoyo_domain_info tomoyo_kernel_domain;
  * Caller holds tomoyo_read_lock().
  */
 int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
-                        bool is_delete, struct list_head *list,
+                        struct tomoyo_acl_param *param,
                         bool (*check_duplicate) (const struct tomoyo_acl_head
                                                  *,
                                                  const struct tomoyo_acl_head
                                                  *))
 {
-       int error = is_delete ? -ENOENT : -ENOMEM;
+       int error = param->is_delete ? -ENOENT : -ENOMEM;
        struct tomoyo_acl_head *entry;
+       struct list_head *list = param->list;
 
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                return -ENOMEM;
        list_for_each_entry_rcu(entry, list, list) {
                if (!check_duplicate(entry, new_entry))
                        continue;
-               entry->is_deleted = is_delete;
+               entry->is_deleted = param->is_delete;
                error = 0;
                break;
        }
-       if (error && !is_delete) {
+       if (error && !param->is_delete) {
                entry = tomoyo_commit_ok(new_entry, size);
                if (entry) {
                        list_add_tail_rcu(&entry->list, list);
@@ -59,12 +57,25 @@ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
 }
 
 /**
+ * tomoyo_same_acl_head - Check for duplicated "struct tomoyo_acl_info" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *a,
+                                       const struct tomoyo_acl_info *b)
+{
+       return a->type == b->type && a->cond == b->cond;
+}
+
+/**
  * tomoyo_update_domain - Update an entry for domain policy.
  *
  * @new_entry:       Pointer to "struct tomoyo_acl_info".
  * @size:            Size of @new_entry in bytes.
- * @is_delete:       True if it is a delete request.
- * @domain:          Pointer to "struct tomoyo_domain_info".
+ * @param:           Pointer to "struct tomoyo_acl_param".
  * @check_duplicate: Callback function to find duplicated entry.
  * @merge_duplicate: Callback function to merge duplicated entry.
  *
@@ -73,7 +84,7 @@ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
  * Caller holds tomoyo_read_lock().
  */
 int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
-                        bool is_delete, struct tomoyo_domain_info *domain,
+                        struct tomoyo_acl_param *param,
                         bool (*check_duplicate) (const struct tomoyo_acl_info
                                                  *,
                                                  const struct tomoyo_acl_info
@@ -82,13 +93,21 @@ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
                                                  struct tomoyo_acl_info *,
                                                  const bool))
 {
+       const bool is_delete = param->is_delete;
        int error = is_delete ? -ENOENT : -ENOMEM;
        struct tomoyo_acl_info *entry;
+       struct list_head * const list = param->list;
 
+       if (param->data[0]) {
+               new_entry->cond = tomoyo_get_condition(param);
+               if (!new_entry->cond)
+                       return -EINVAL;
+       }
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
-               return error;
-       list_for_each_entry_rcu(entry, &domain->acl_info_list, list) {
-               if (!check_duplicate(entry, new_entry))
+               goto out;
+       list_for_each_entry_rcu(entry, list, list) {
+               if (!tomoyo_same_acl_head(entry, new_entry) ||
+                   !check_duplicate(entry, new_entry))
                        continue;
                if (merge_duplicate)
                        entry->is_deleted = merge_duplicate(entry, new_entry,
@@ -101,28 +120,50 @@ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
        if (error && !is_delete) {
                entry = tomoyo_commit_ok(new_entry, size);
                if (entry) {
-                       list_add_tail_rcu(&entry->list, &domain->acl_info_list);
+                       list_add_tail_rcu(&entry->list, list);
                        error = 0;
                }
        }
        mutex_unlock(&tomoyo_policy_lock);
+out:
+       tomoyo_put_condition(new_entry->cond);
        return error;
 }
 
+/**
+ * tomoyo_check_acl - Do permission check.
+ *
+ * @r:           Pointer to "struct tomoyo_request_info".
+ * @check_entry: Callback function to check type specific parameters.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
 void tomoyo_check_acl(struct tomoyo_request_info *r,
                      bool (*check_entry) (struct tomoyo_request_info *,
                                           const struct tomoyo_acl_info *))
 {
        const struct tomoyo_domain_info *domain = r->domain;
        struct tomoyo_acl_info *ptr;
+       bool retried = false;
+       const struct list_head *list = &domain->acl_info_list;
 
-       list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+retry:
+       list_for_each_entry_rcu(ptr, list, list) {
                if (ptr->is_deleted || ptr->type != r->param_type)
                        continue;
-               if (check_entry(r, ptr)) {
-                       r->granted = true;
-                       return;
-               }
+               if (!check_entry(r, ptr))
+                       continue;
+               if (!tomoyo_condition(r, ptr->cond))
+                       continue;
+               r->granted = true;
+               return;
+       }
+       if (!retried) {
+               retried = true;
+               list = &domain->ns->acl_group[domain->group];
+               goto retry;
        }
        r->granted = false;
 }
@@ -130,24 +171,29 @@ void tomoyo_check_acl(struct tomoyo_request_info *r,
 /* The list for "struct tomoyo_domain_info". */
 LIST_HEAD(tomoyo_domain_list);
 
-struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY];
-struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP];
-
 /**
  * tomoyo_last_word - Get last component of a domainname.
  *
- * @domainname: Domainname to check.
+ * @name: Domainname to check.
  *
  * Returns the last word of @domainname.
  */
 static const char *tomoyo_last_word(const char *name)
 {
-        const char *cp = strrchr(name, ' ');
-        if (cp)
-                return cp + 1;
-        return name;
+       const char *cp = strrchr(name, ' ');
+       if (cp)
+               return cp + 1;
+       return name;
 }
 
+/**
+ * tomoyo_same_transition_control - Check for duplicated "struct tomoyo_transition_control" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a,
                                           const struct tomoyo_acl_head *b)
 {
@@ -163,30 +209,36 @@ static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a,
 }
 
 /**
- * tomoyo_update_transition_control_entry - Update "struct tomoyo_transition_control" list.
+ * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
  *
- * @domainname: The name of domain. Maybe NULL.
- * @program:    The name of program. Maybe NULL.
- * @type:       Type of transition.
- * @is_delete:  True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @type:  Type of this entry.
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_update_transition_control_entry(const char *domainname,
-                                                 const char *program,
-                                                 const u8 type,
-                                                 const bool is_delete)
+int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
+                                   const u8 type)
 {
        struct tomoyo_transition_control e = { .type = type };
-       int error = is_delete ? -ENOENT : -ENOMEM;
-       if (program) {
+       int error = param->is_delete ? -ENOENT : -ENOMEM;
+       char *program = param->data;
+       char *domainname = strstr(program, " from ");
+       if (domainname) {
+               *domainname = '\0';
+               domainname += 6;
+       } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
+                  type == TOMOYO_TRANSITION_CONTROL_KEEP) {
+               domainname = program;
+               program = NULL;
+       }
+       if (program && strcmp(program, "any")) {
                if (!tomoyo_correct_path(program))
                        return -EINVAL;
                e.program = tomoyo_get_name(program);
                if (!e.program)
                        goto out;
        }
-       if (domainname) {
+       if (domainname && strcmp(domainname, "any")) {
                if (!tomoyo_correct_domain(domainname)) {
                        if (!tomoyo_correct_path(domainname))
                                goto out;
@@ -196,126 +248,136 @@ static int tomoyo_update_transition_control_entry(const char *domainname,
                if (!e.domainname)
                        goto out;
        }
-       error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                    &tomoyo_policy_list
-                                    [TOMOYO_ID_TRANSITION_CONTROL],
+       param->list = &param->ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
+       error = tomoyo_update_policy(&e.head, sizeof(e), param,
                                     tomoyo_same_transition_control);
- out:
+out:
        tomoyo_put_name(e.domainname);
        tomoyo_put_name(e.program);
        return error;
 }
 
 /**
- * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
+ * tomoyo_scan_transition - Try to find specific domain transition type.
  *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- * @type:      Type of this entry.
+ * @list:       Pointer to "struct list_head".
+ * @domainname: The name of current domain.
+ * @program:    The name of requested program.
+ * @last_name:  The last component of @domainname.
+ * @type:       One of values in "enum tomoyo_transition_type".
  *
- * Returns 0 on success, negative value otherwise.
+ * Returns true if found one, false otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_transition_control(char *data, const bool is_delete,
-                                   const u8 type)
+static inline bool tomoyo_scan_transition
+(const struct list_head *list, const struct tomoyo_path_info *domainname,
+ const struct tomoyo_path_info *program, const char *last_name,
+ const enum tomoyo_transition_type type)
 {
-       char *domainname = strstr(data, " from ");
-       if (domainname) {
-               *domainname = '\0';
-               domainname += 6;
-       } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
-                  type == TOMOYO_TRANSITION_CONTROL_KEEP) {
-               domainname = data;
-               data = NULL;
+       const struct tomoyo_transition_control *ptr;
+       list_for_each_entry_rcu(ptr, list, head.list) {
+               if (ptr->head.is_deleted || ptr->type != type)
+                       continue;
+               if (ptr->domainname) {
+                       if (!ptr->is_last_name) {
+                               if (ptr->domainname != domainname)
+                                       continue;
+                       } else {
+                               /*
+                                * Use direct strcmp() since this is
+                                * unlikely used.
+                                */
+                               if (strcmp(ptr->domainname->name, last_name))
+                                       continue;
+                       }
+               }
+               if (ptr->program && tomoyo_pathcmp(ptr->program, program))
+                       continue;
+               return true;
        }
-       return tomoyo_update_transition_control_entry(domainname, data, type,
-                                                     is_delete);
+       return false;
 }
 
 /**
  * tomoyo_transition_type - Get domain transition type.
  *
- * @domainname: The name of domain.
- * @program:    The name of program.
+ * @ns:         Pointer to "struct tomoyo_policy_namespace".
+ * @domainname: The name of current domain.
+ * @program:    The name of requested program.
  *
- * Returns TOMOYO_TRANSITION_CONTROL_INITIALIZE if executing @program
- * reinitializes domain transition, TOMOYO_TRANSITION_CONTROL_KEEP if executing
- * @program suppresses domain transition, others otherwise.
+ * Returns TOMOYO_TRANSITION_CONTROL_TRANSIT if executing @program causes
+ * domain transition across namespaces, TOMOYO_TRANSITION_CONTROL_INITIALIZE if
+ * executing @program reinitializes domain transition within that namespace,
+ * TOMOYO_TRANSITION_CONTROL_KEEP if executing @program stays at @domainname ,
+ * others otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static u8 tomoyo_transition_type(const struct tomoyo_path_info *domainname,
-                                const struct tomoyo_path_info *program)
+static enum tomoyo_transition_type tomoyo_transition_type
+(const struct tomoyo_policy_namespace *ns,
+ const struct tomoyo_path_info *domainname,
+ const struct tomoyo_path_info *program)
 {
-       const struct tomoyo_transition_control *ptr;
        const char *last_name = tomoyo_last_word(domainname->name);
-       u8 type;
-       for (type = 0; type < TOMOYO_MAX_TRANSITION_TYPE; type++) {
- next:
-               list_for_each_entry_rcu(ptr, &tomoyo_policy_list
-                                       [TOMOYO_ID_TRANSITION_CONTROL],
-                                       head.list) {
-                       if (ptr->head.is_deleted || ptr->type != type)
-                               continue;
-                       if (ptr->domainname) {
-                               if (!ptr->is_last_name) {
-                                       if (ptr->domainname != domainname)
-                                               continue;
-                               } else {
-                                       /*
-                                        * Use direct strcmp() since this is
-                                        * unlikely used.
-                                        */
-                                       if (strcmp(ptr->domainname->name,
-                                                  last_name))
-                                               continue;
-                               }
-                       }
-                       if (ptr->program &&
-                           tomoyo_pathcmp(ptr->program, program))
-                               continue;
-                       if (type == TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE) {
-                               /*
-                                * Do not check for initialize_domain if
-                                * no_initialize_domain matched.
-                                */
-                               type = TOMOYO_TRANSITION_CONTROL_NO_KEEP;
-                               goto next;
-                       }
-                       goto done;
+       enum tomoyo_transition_type type = TOMOYO_TRANSITION_CONTROL_NO_RESET;
+       while (type < TOMOYO_MAX_TRANSITION_TYPE) {
+               const struct list_head * const list =
+                       &ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
+               if (!tomoyo_scan_transition(list, domainname, program,
+                                           last_name, type)) {
+                       type++;
+                       continue;
                }
+               if (type != TOMOYO_TRANSITION_CONTROL_NO_RESET &&
+                   type != TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE)
+                       break;
+               /*
+                * Do not check for reset_domain if no_reset_domain matched.
+                * Do not check for initialize_domain if no_initialize_domain
+                * matched.
+                */
+               type++;
+               type++;
        }
- done:
        return type;
 }
 
+/**
+ * tomoyo_same_aggregator - Check for duplicated "struct tomoyo_aggregator" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_aggregator(const struct tomoyo_acl_head *a,
                                   const struct tomoyo_acl_head *b)
 {
-       const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1), head);
-       const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2), head);
+       const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1),
+                                                         head);
+       const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2),
+                                                         head);
        return p1->original_name == p2->original_name &&
                p1->aggregated_name == p2->aggregated_name;
 }
 
 /**
- * tomoyo_update_aggregator_entry - Update "struct tomoyo_aggregator" list.
+ * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
  *
- * @original_name:   The original program's name.
- * @aggregated_name: The program name to use.
- * @is_delete:       True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_aggregator_entry(const char *original_name,
-                                         const char *aggregated_name,
-                                         const bool is_delete)
+int tomoyo_write_aggregator(struct tomoyo_acl_param *param)
 {
        struct tomoyo_aggregator e = { };
-       int error = is_delete ? -ENOENT : -ENOMEM;
-
-       if (!tomoyo_correct_path(original_name) ||
+       int error = param->is_delete ? -ENOENT : -ENOMEM;
+       const char *original_name = tomoyo_read_token(param);
+       const char *aggregated_name = tomoyo_read_token(param);
+       if (!tomoyo_correct_word(original_name) ||
            !tomoyo_correct_path(aggregated_name))
                return -EINVAL;
        e.original_name = tomoyo_get_name(original_name);
@@ -323,83 +385,181 @@ static int tomoyo_update_aggregator_entry(const char *original_name,
        if (!e.original_name || !e.aggregated_name ||
            e.aggregated_name->is_patterned) /* No patterns allowed. */
                goto out;
-       error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                    &tomoyo_policy_list[TOMOYO_ID_AGGREGATOR],
+       param->list = &param->ns->policy_list[TOMOYO_ID_AGGREGATOR];
+       error = tomoyo_update_policy(&e.head, sizeof(e), param,
                                     tomoyo_same_aggregator);
- out:
+out:
        tomoyo_put_name(e.original_name);
        tomoyo_put_name(e.aggregated_name);
        return error;
 }
 
 /**
- * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
+ * tomoyo_find_namespace - Find specified namespace.
  *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
+ * @name: Name of namespace to find.
+ * @len:  Length of @name.
  *
- * Returns 0 on success, negative value otherwise.
+ * Returns pointer to "struct tomoyo_policy_namespace" if found,
+ * NULL otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_aggregator(char *data, const bool is_delete)
+static struct tomoyo_policy_namespace *tomoyo_find_namespace
+(const char *name, const unsigned int len)
 {
-       char *cp = strchr(data, ' ');
+       struct tomoyo_policy_namespace *ns;
+       list_for_each_entry(ns, &tomoyo_namespace_list, namespace_list) {
+               if (strncmp(name, ns->name, len) ||
+                   (name[len] && name[len] != ' '))
+                       continue;
+               return ns;
+       }
+       return NULL;
+}
 
-       if (!cp)
-               return -EINVAL;
-       *cp++ = '\0';
-       return tomoyo_update_aggregator_entry(data, cp, is_delete);
+/**
+ * tomoyo_assign_namespace - Create a new namespace.
+ *
+ * @domainname: Name of namespace to create.
+ *
+ * Returns pointer to "struct tomoyo_policy_namespace" on success,
+ * NULL otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+struct tomoyo_policy_namespace *tomoyo_assign_namespace(const char *domainname)
+{
+       struct tomoyo_policy_namespace *ptr;
+       struct tomoyo_policy_namespace *entry;
+       const char *cp = domainname;
+       unsigned int len = 0;
+       while (*cp && *cp++ != ' ')
+               len++;
+       ptr = tomoyo_find_namespace(domainname, len);
+       if (ptr)
+               return ptr;
+       if (len >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_domain_def(domainname))
+               return NULL;
+       entry = kzalloc(sizeof(*entry) + len + 1, GFP_NOFS);
+       if (!entry)
+               return NULL;
+       if (mutex_lock_interruptible(&tomoyo_policy_lock))
+               goto out;
+       ptr = tomoyo_find_namespace(domainname, len);
+       if (!ptr && tomoyo_memory_ok(entry)) {
+               char *name = (char *) (entry + 1);
+               ptr = entry;
+               memmove(name, domainname, len);
+               name[len] = '\0';
+               entry->name = name;
+               tomoyo_init_policy_namespace(entry);
+               entry = NULL;
+       }
+       mutex_unlock(&tomoyo_policy_lock);
+out:
+       kfree(entry);
+       return ptr;
 }
 
 /**
- * tomoyo_assign_domain - Create a domain.
+ * tomoyo_namespace_jump - Check for namespace jump.
+ *
+ * @domainname: Name of domain.
+ *
+ * Returns true if namespace differs, false otherwise.
+ */
+static bool tomoyo_namespace_jump(const char *domainname)
+{
+       const char *namespace = tomoyo_current_namespace()->name;
+       const int len = strlen(namespace);
+       return strncmp(domainname, namespace, len) ||
+               (domainname[len] && domainname[len] != ' ');
+}
+
+/**
+ * tomoyo_assign_domain - Create a domain or a namespace.
  *
  * @domainname: The name of domain.
- * @profile:    Profile number to assign if the domain was newly created.
+ * @transit:    True if transit to domain found or created.
  *
  * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
 struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
-                                               const u8 profile)
+                                               const bool transit)
 {
-       struct tomoyo_domain_info *entry;
-       struct tomoyo_domain_info *domain = NULL;
-       const struct tomoyo_path_info *saved_domainname;
-       bool found = false;
-
-       if (!tomoyo_correct_domain(domainname))
+       struct tomoyo_domain_info e = { };
+       struct tomoyo_domain_info *entry = tomoyo_find_domain(domainname);
+       bool created = false;
+       if (entry) {
+               if (transit) {
+                       /*
+                        * Since namespace is created at runtime, profiles may
+                        * not be created by the moment the process transits to
+                        * that domain. Do not perform domain transition if
+                        * profile for that domain is not yet created.
+                        */
+                       if (!entry->ns->profile_ptr[entry->profile])
+                               return NULL;
+               }
+               return entry;
+       }
+       /* Requested domain does not exist. */
+       /* Don't create requested domain if domainname is invalid. */
+       if (strlen(domainname) >= TOMOYO_EXEC_TMPSIZE - 10 ||
+           !tomoyo_correct_domain(domainname))
+               return NULL;
+       /*
+        * Since definition of profiles and acl_groups may differ across
+        * namespaces, do not inherit "use_profile" and "use_group" settings
+        * by automatically creating requested domain upon domain transition.
+        */
+       if (transit && tomoyo_namespace_jump(domainname))
+               return NULL;
+       e.ns = tomoyo_assign_namespace(domainname);
+       if (!e.ns)
                return NULL;
-       saved_domainname = tomoyo_get_name(domainname);
-       if (!saved_domainname)
+       /*
+        * "use_profile" and "use_group" settings for automatically created
+        * domains are inherited from current domain. These are 0 for manually
+        * created domains.
+        */
+       if (transit) {
+               const struct tomoyo_domain_info *domain = tomoyo_domain();
+               e.profile = domain->profile;
+               e.group = domain->group;
+       }
+       e.domainname = tomoyo_get_name(domainname);
+       if (!e.domainname)
                return NULL;
-       entry = kzalloc(sizeof(*entry), GFP_NOFS);
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
-       list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
-               if (domain->is_deleted ||
-                   tomoyo_pathcmp(saved_domainname, domain->domainname))
-                       continue;
-               found = true;
-               break;
-       }
-       if (!found && tomoyo_memory_ok(entry)) {
-               INIT_LIST_HEAD(&entry->acl_info_list);
-               entry->domainname = saved_domainname;
-               saved_domainname = NULL;
-               entry->profile = profile;
-               list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
-               domain = entry;
-               entry = NULL;
-               found = true;
+       entry = tomoyo_find_domain(domainname);
+       if (!entry) {
+               entry = tomoyo_commit_ok(&e, sizeof(e));
+               if (entry) {
+                       INIT_LIST_HEAD(&entry->acl_info_list);
+                       list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
+                       created = true;
+               }
        }
        mutex_unlock(&tomoyo_policy_lock);
- out:
-       tomoyo_put_name(saved_domainname);
-       kfree(entry);
-       return found ? domain : NULL;
+out:
+       tomoyo_put_name(e.domainname);
+       if (entry && transit) {
+               if (created) {
+                       struct tomoyo_request_info r;
+                       tomoyo_init_request_info(&r, entry,
+                                                TOMOYO_MAC_FILE_EXECUTE);
+                       r.granted = false;
+                       tomoyo_write_log(&r, "use_profile %u\n",
+                                        entry->profile);
+                       tomoyo_write_log(&r, "use_group %u\n", entry->group);
+               }
+       }
+       return entry;
 }
 
 /**
@@ -413,22 +573,27 @@ struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
  */
 int tomoyo_find_next_domain(struct linux_binprm *bprm)
 {
-       struct tomoyo_request_info r;
-       char *tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
        struct tomoyo_domain_info *old_domain = tomoyo_domain();
        struct tomoyo_domain_info *domain = NULL;
        const char *original_name = bprm->filename;
-       u8 mode;
-       bool is_enforce;
        int retval = -ENOMEM;
        bool need_kfree = false;
+       bool reject_on_transition_failure = false;
        struct tomoyo_path_info rn = { }; /* real name */
-
-       mode = tomoyo_init_request_info(&r, NULL, TOMOYO_MAC_FILE_EXECUTE);
-       is_enforce = (mode == TOMOYO_CONFIG_ENFORCING);
-       if (!tmp)
-               goto out;
-
+       struct tomoyo_execve *ee = kzalloc(sizeof(*ee), GFP_NOFS);
+       if (!ee)
+               return -ENOMEM;
+       ee->tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
+       if (!ee->tmp) {
+               kfree(ee);
+               return -ENOMEM;
+       }
+       /* ee->dump->data is allocated by tomoyo_dump_page(). */
+       tomoyo_init_request_info(&ee->r, NULL, TOMOYO_MAC_FILE_EXECUTE);
+       ee->r.ee = ee;
+       ee->bprm = bprm;
+       ee->r.obj = &ee->obj;
+       ee->obj.path1 = bprm->file->f_path;
  retry:
        if (need_kfree) {
                kfree(rn.name);
@@ -445,8 +610,10 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm)
        /* Check 'aggregator' directive. */
        {
                struct tomoyo_aggregator *ptr;
-               list_for_each_entry_rcu(ptr, &tomoyo_policy_list
-                                       [TOMOYO_ID_AGGREGATOR], head.list) {
+               struct list_head *list =
+                       &old_domain->ns->policy_list[TOMOYO_ID_AGGREGATOR];
+               /* Check 'aggregator' directive. */
+               list_for_each_entry_rcu(ptr, list, head.list) {
                        if (ptr->head.is_deleted ||
                            !tomoyo_path_matches_pattern(&rn,
                                                         ptr->original_name))
@@ -460,7 +627,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm)
        }
 
        /* Check execute permission. */
-       retval = tomoyo_path_permission(&r, TOMOYO_TYPE_EXECUTE, &rn);
+       retval = tomoyo_path_permission(&ee->r, TOMOYO_TYPE_EXECUTE, &rn);
        if (retval == TOMOYO_RETRY_REQUEST)
                goto retry;
        if (retval < 0)
@@ -471,20 +638,30 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm)
         * wildcard) rather than the pathname passed to execve()
         * (which never contains wildcard).
         */
-       if (r.param.path.matched_path) {
+       if (ee->r.param.path.matched_path) {
                if (need_kfree)
                        kfree(rn.name);
                need_kfree = false;
                /* This is OK because it is read only. */
-               rn = *r.param.path.matched_path;
+               rn = *ee->r.param.path.matched_path;
        }
 
        /* Calculate domain to transit to. */
-       switch (tomoyo_transition_type(old_domain->domainname, &rn)) {
+       switch (tomoyo_transition_type(old_domain->ns, old_domain->domainname,
+                                      &rn)) {
+       case TOMOYO_TRANSITION_CONTROL_RESET:
+               /* Transit to the root of specified namespace. */
+               snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", rn.name);
+               /*
+                * Make do_execve() fail if domain transition across namespaces
+                * has failed.
+                */
+               reject_on_transition_failure = true;
+               break;
        case TOMOYO_TRANSITION_CONTROL_INITIALIZE:
-               /* Transit to the child of tomoyo_kernel_domain domain. */
-               snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, TOMOYO_ROOT_NAME " "
-                        "%s", rn.name);
+               /* Transit to the child of current namespace's root. */
+               snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
+                        old_domain->ns->name, rn.name);
                break;
        case TOMOYO_TRANSITION_CONTROL_KEEP:
                /* Keep current domain. */
@@ -502,33 +679,32 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm)
                        domain = old_domain;
                } else {
                        /* Normal domain transition. */
-                       snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
+                       snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
                                 old_domain->domainname->name, rn.name);
                }
                break;
        }
-       if (domain || strlen(tmp) >= TOMOYO_EXEC_TMPSIZE - 10)
-               goto done;
-       domain = tomoyo_find_domain(tmp);
+       if (!domain)
+               domain = tomoyo_assign_domain(ee->tmp, true);
        if (domain)
-               goto done;
-       if (is_enforce) {
-               int error = tomoyo_supervisor(&r, "# wants to create domain\n"
-                                             "%s\n", tmp);
-               if (error == TOMOYO_RETRY_REQUEST)
-                       goto retry;
-               if (error < 0)
-                       goto done;
+               retval = 0;
+       else if (reject_on_transition_failure) {
+               printk(KERN_WARNING "ERROR: Domain '%s' not ready.\n",
+                      ee->tmp);
+               retval = -ENOMEM;
+       } else if (ee->r.mode == TOMOYO_CONFIG_ENFORCING)
+               retval = -ENOMEM;
+       else {
+               retval = 0;
+               if (!old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED]) {
+                       old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED] = true;
+                       ee->r.granted = false;
+                       tomoyo_write_log(&ee->r, "%s", tomoyo_dif
+                                        [TOMOYO_DIF_TRANSITION_FAILED]);
+                       printk(KERN_WARNING
+                              "ERROR: Domain '%s' not defined.\n", ee->tmp);
+               }
        }
-       domain = tomoyo_assign_domain(tmp, old_domain->profile);
- done:
-       if (domain)
-               goto out;
-       printk(KERN_WARNING "TOMOYO-ERROR: Domain '%s' not defined.\n", tmp);
-       if (is_enforce)
-               retval = -EPERM;
-       else
-               old_domain->transition_failed = true;
  out:
        if (!domain)
                domain = old_domain;
@@ -537,6 +713,54 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm)
        bprm->cred->security = domain;
        if (need_kfree)
                kfree(rn.name);
-       kfree(tmp);
+       kfree(ee->tmp);
+       kfree(ee->dump.data);
+       kfree(ee);
        return retval;
 }
+
+/**
+ * tomoyo_dump_page - Dump a page to buffer.
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ * @pos:  Location to dump.
+ * @dump: Poiner to "struct tomoyo_page_dump".
+ *
+ * Returns true on success, false otherwise.
+ */
+bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
+                     struct tomoyo_page_dump *dump)
+{
+       struct page *page;
+       /* dump->data is released by tomoyo_finish_execve(). */
+       if (!dump->data) {
+               dump->data = kzalloc(PAGE_SIZE, GFP_NOFS);
+               if (!dump->data)
+                       return false;
+       }
+       /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
+#ifdef CONFIG_MMU
+       if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0)
+               return false;
+#else
+       page = bprm->page[pos / PAGE_SIZE];
+#endif
+       if (page != dump->page) {
+               const unsigned int offset = pos % PAGE_SIZE;
+               /*
+                * Maybe kmap()/kunmap() should be used here.
+                * But remove_arg_zero() uses kmap_atomic()/kunmap_atomic().
+                * So do I.
+                */
+               char *kaddr = kmap_atomic(page, KM_USER0);
+               dump->page = page;
+               memcpy(dump->data + offset, kaddr + offset,
+                      PAGE_SIZE - offset);
+               kunmap_atomic(kaddr, KM_USER0);
+       }
+       /* Same with put_arg_page(page) in fs/exec.c */
+#ifdef CONFIG_MMU
+       put_page(page);
+#endif
+       return true;
+}
index d64e8ec..743c35f 100644 (file)
@@ -1,80 +1,51 @@
 /*
  * security/tomoyo/file.c
  *
- * Pathname restriction functions.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
 #include <linux/slab.h>
 
-/* Keyword array for operations with one pathname. */
-const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
-       [TOMOYO_TYPE_READ_WRITE] = "read/write",
-       [TOMOYO_TYPE_EXECUTE]    = "execute",
-       [TOMOYO_TYPE_READ]       = "read",
-       [TOMOYO_TYPE_WRITE]      = "write",
-       [TOMOYO_TYPE_UNLINK]     = "unlink",
-       [TOMOYO_TYPE_RMDIR]      = "rmdir",
-       [TOMOYO_TYPE_TRUNCATE]   = "truncate",
-       [TOMOYO_TYPE_SYMLINK]    = "symlink",
-       [TOMOYO_TYPE_REWRITE]    = "rewrite",
-       [TOMOYO_TYPE_CHROOT]     = "chroot",
-       [TOMOYO_TYPE_UMOUNT]     = "unmount",
-};
-
-/* Keyword array for operations with one pathname and three numbers. */
-const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION] = {
-       [TOMOYO_TYPE_MKBLOCK]    = "mkblock",
-       [TOMOYO_TYPE_MKCHAR]     = "mkchar",
-};
-
-/* Keyword array for operations with two pathnames. */
-const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION] = {
-       [TOMOYO_TYPE_LINK]       = "link",
-       [TOMOYO_TYPE_RENAME]     = "rename",
-       [TOMOYO_TYPE_PIVOT_ROOT] = "pivot_root",
-};
-
-/* Keyword array for operations with one pathname and one number. */
-const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
-       [TOMOYO_TYPE_CREATE]     = "create",
-       [TOMOYO_TYPE_MKDIR]      = "mkdir",
-       [TOMOYO_TYPE_MKFIFO]     = "mkfifo",
-       [TOMOYO_TYPE_MKSOCK]     = "mksock",
-       [TOMOYO_TYPE_IOCTL]      = "ioctl",
-       [TOMOYO_TYPE_CHMOD]      = "chmod",
-       [TOMOYO_TYPE_CHOWN]      = "chown",
-       [TOMOYO_TYPE_CHGRP]      = "chgrp",
-};
-
+/*
+ * Mapping table from "enum tomoyo_path_acl_index" to "enum tomoyo_mac_index".
+ */
 static const u8 tomoyo_p2mac[TOMOYO_MAX_PATH_OPERATION] = {
-       [TOMOYO_TYPE_READ_WRITE] = TOMOYO_MAC_FILE_OPEN,
        [TOMOYO_TYPE_EXECUTE]    = TOMOYO_MAC_FILE_EXECUTE,
        [TOMOYO_TYPE_READ]       = TOMOYO_MAC_FILE_OPEN,
        [TOMOYO_TYPE_WRITE]      = TOMOYO_MAC_FILE_OPEN,
+       [TOMOYO_TYPE_APPEND]     = TOMOYO_MAC_FILE_OPEN,
        [TOMOYO_TYPE_UNLINK]     = TOMOYO_MAC_FILE_UNLINK,
+       [TOMOYO_TYPE_GETATTR]    = TOMOYO_MAC_FILE_GETATTR,
        [TOMOYO_TYPE_RMDIR]      = TOMOYO_MAC_FILE_RMDIR,
        [TOMOYO_TYPE_TRUNCATE]   = TOMOYO_MAC_FILE_TRUNCATE,
        [TOMOYO_TYPE_SYMLINK]    = TOMOYO_MAC_FILE_SYMLINK,
-       [TOMOYO_TYPE_REWRITE]    = TOMOYO_MAC_FILE_REWRITE,
        [TOMOYO_TYPE_CHROOT]     = TOMOYO_MAC_FILE_CHROOT,
        [TOMOYO_TYPE_UMOUNT]     = TOMOYO_MAC_FILE_UMOUNT,
 };
 
-static const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_mkdev_acl_index" to "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
        [TOMOYO_TYPE_MKBLOCK] = TOMOYO_MAC_FILE_MKBLOCK,
        [TOMOYO_TYPE_MKCHAR]  = TOMOYO_MAC_FILE_MKCHAR,
 };
 
-static const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_path2_acl_index" to "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
        [TOMOYO_TYPE_LINK]       = TOMOYO_MAC_FILE_LINK,
        [TOMOYO_TYPE_RENAME]     = TOMOYO_MAC_FILE_RENAME,
        [TOMOYO_TYPE_PIVOT_ROOT] = TOMOYO_MAC_FILE_PIVOT_ROOT,
 };
 
-static const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_path_number_acl_index" to
+ * "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
        [TOMOYO_TYPE_CREATE] = TOMOYO_MAC_FILE_CREATE,
        [TOMOYO_TYPE_MKDIR]  = TOMOYO_MAC_FILE_MKDIR,
        [TOMOYO_TYPE_MKFIFO] = TOMOYO_MAC_FILE_MKFIFO,
@@ -85,41 +56,76 @@ static const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
        [TOMOYO_TYPE_CHGRP]  = TOMOYO_MAC_FILE_CHGRP,
 };
 
+/**
+ * tomoyo_put_name_union - Drop reference on "struct tomoyo_name_union".
+ *
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns nothing.
+ */
 void tomoyo_put_name_union(struct tomoyo_name_union *ptr)
 {
-       if (!ptr)
-               return;
-       if (ptr->is_group)
-               tomoyo_put_group(ptr->group);
-       else
-               tomoyo_put_name(ptr->filename);
+       tomoyo_put_group(ptr->group);
+       tomoyo_put_name(ptr->filename);
 }
 
+/**
+ * tomoyo_compare_name_union - Check whether a name matches "struct tomoyo_name_union" or not.
+ *
+ * @name: Pointer to "struct tomoyo_path_info".
+ * @ptr:  Pointer to "struct tomoyo_name_union".
+ *
+ * Returns "struct tomoyo_path_info" if @name matches @ptr, NULL otherwise.
+ */
 const struct tomoyo_path_info *
 tomoyo_compare_name_union(const struct tomoyo_path_info *name,
                          const struct tomoyo_name_union *ptr)
 {
-       if (ptr->is_group)
+       if (ptr->group)
                return tomoyo_path_matches_group(name, ptr->group);
        if (tomoyo_path_matches_pattern(name, ptr->filename))
                return ptr->filename;
        return NULL;
 }
 
+/**
+ * tomoyo_put_number_union - Drop reference on "struct tomoyo_number_union".
+ *
+ * @ptr: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
 void tomoyo_put_number_union(struct tomoyo_number_union *ptr)
 {
-       if (ptr && ptr->is_group)
-               tomoyo_put_group(ptr->group);
+       tomoyo_put_group(ptr->group);
 }
 
+/**
+ * tomoyo_compare_number_union - Check whether a value matches "struct tomoyo_number_union" or not.
+ *
+ * @value: Number to check.
+ * @ptr:   Pointer to "struct tomoyo_number_union".
+ *
+ * Returns true if @value matches @ptr, false otherwise.
+ */
 bool tomoyo_compare_number_union(const unsigned long value,
                                 const struct tomoyo_number_union *ptr)
 {
-       if (ptr->is_group)
+       if (ptr->group)
                return tomoyo_number_matches_group(value, value, ptr->group);
        return value >= ptr->values[0] && value <= ptr->values[1];
 }
 
+/**
+ * tomoyo_add_slash - Add trailing '/' if needed.
+ *
+ * @buf: Pointer to "struct tomoyo_path_info".
+ *
+ * Returns nothing.
+ *
+ * @buf must be generated by tomoyo_encode() because this function does not
+ * allocate memory for adding '/'.
+ */
 static void tomoyo_add_slash(struct tomoyo_path_info *buf)
 {
        if (buf->is_dir)
@@ -132,24 +138,6 @@ static void tomoyo_add_slash(struct tomoyo_path_info *buf)
 }
 
 /**
- * tomoyo_strendswith - Check whether the token ends with the given token.
- *
- * @name: The token to check.
- * @tail: The token to find.
- *
- * Returns true if @name ends with @tail, false otherwise.
- */
-static bool tomoyo_strendswith(const char *name, const char *tail)
-{
-       int len;
-
-       if (!name || !tail)
-               return false;
-       len = strlen(name) - strlen(tail);
-       return len >= 0 && !strcmp(name + len, tail);
-}
-
-/**
  * tomoyo_get_realpath - Get realpath.
  *
  * @buf:  Pointer to "struct tomoyo_path_info".
@@ -164,7 +152,7 @@ static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path)
                tomoyo_fill_path_info(buf);
                return true;
        }
-        return false;
+       return false;
 }
 
 /**
@@ -176,13 +164,9 @@ static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path)
  */
 static int tomoyo_audit_path_log(struct tomoyo_request_info *r)
 {
-       const char *operation = tomoyo_path_keyword[r->param.path.operation];
-       const struct tomoyo_path_info *filename = r->param.path.filename;
-       if (r->granted)
-               return 0;
-       tomoyo_warn_log(r, "%s %s", operation, filename->name);
-       return tomoyo_supervisor(r, "allow_%s %s\n", operation,
-                                tomoyo_pattern(filename));
+       return tomoyo_supervisor(r, "file %s %s\n", tomoyo_path_keyword
+                                [r->param.path.operation],
+                                r->param.path.filename->name);
 }
 
 /**
@@ -194,16 +178,10 @@ static int tomoyo_audit_path_log(struct tomoyo_request_info *r)
  */
 static int tomoyo_audit_path2_log(struct tomoyo_request_info *r)
 {
-       const char *operation = tomoyo_path2_keyword[r->param.path2.operation];
-       const struct tomoyo_path_info *filename1 = r->param.path2.filename1;
-       const struct tomoyo_path_info *filename2 = r->param.path2.filename2;
-       if (r->granted)
-               return 0;
-       tomoyo_warn_log(r, "%s %s %s", operation, filename1->name,
-                       filename2->name);
-       return tomoyo_supervisor(r, "allow_%s %s %s\n", operation,
-                                tomoyo_pattern(filename1),
-                                tomoyo_pattern(filename2));
+       return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
+                                [tomoyo_pp2mac[r->param.path2.operation]],
+                                r->param.path2.filename1->name,
+                                r->param.path2.filename2->name);
 }
 
 /**
@@ -215,24 +193,18 @@ static int tomoyo_audit_path2_log(struct tomoyo_request_info *r)
  */
 static int tomoyo_audit_mkdev_log(struct tomoyo_request_info *r)
 {
-       const char *operation = tomoyo_mkdev_keyword[r->param.mkdev.operation];
-       const struct tomoyo_path_info *filename = r->param.mkdev.filename;
-       const unsigned int major = r->param.mkdev.major;
-       const unsigned int minor = r->param.mkdev.minor;
-       const unsigned int mode = r->param.mkdev.mode;
-       if (r->granted)
-               return 0;
-       tomoyo_warn_log(r, "%s %s 0%o %u %u", operation, filename->name, mode,
-                       major, minor);
-       return tomoyo_supervisor(r, "allow_%s %s 0%o %u %u\n", operation,
-                                tomoyo_pattern(filename), mode, major, minor);
+       return tomoyo_supervisor(r, "file %s %s 0%o %u %u\n",
+                                tomoyo_mac_keywords
+                                [tomoyo_pnnn2mac[r->param.mkdev.operation]],
+                                r->param.mkdev.filename->name,
+                                r->param.mkdev.mode, r->param.mkdev.major,
+                                r->param.mkdev.minor);
 }
 
 /**
  * tomoyo_audit_path_number_log - Audit path/number request log.
  *
- * @r:     Pointer to "struct tomoyo_request_info".
- * @error: Error code.
+ * @r: Pointer to "struct tomoyo_request_info".
  *
  * Returns 0 on success, negative value otherwise.
  */
@@ -240,11 +212,7 @@ static int tomoyo_audit_path_number_log(struct tomoyo_request_info *r)
 {
        const u8 type = r->param.path_number.operation;
        u8 radix;
-       const struct tomoyo_path_info *filename = r->param.path_number.filename;
-       const char *operation = tomoyo_path_number_keyword[type];
        char buffer[64];
-       if (r->granted)
-               return 0;
        switch (type) {
        case TOMOYO_TYPE_CREATE:
        case TOMOYO_TYPE_MKDIR:
@@ -262,251 +230,23 @@ static int tomoyo_audit_path_number_log(struct tomoyo_request_info *r)
        }
        tomoyo_print_ulong(buffer, sizeof(buffer), r->param.path_number.number,
                           radix);
-       tomoyo_warn_log(r, "%s %s %s", operation, filename->name, buffer);
-       return tomoyo_supervisor(r, "allow_%s %s %s\n", operation,
-                                tomoyo_pattern(filename), buffer);
-}
-
-static bool tomoyo_same_globally_readable(const struct tomoyo_acl_head *a,
-                                         const struct tomoyo_acl_head *b)
-{
-       return container_of(a, struct tomoyo_readable_file,
-                           head)->filename ==
-               container_of(b, struct tomoyo_readable_file,
-                            head)->filename;
-}
-
-/**
- * tomoyo_update_globally_readable_entry - Update "struct tomoyo_readable_file" list.
- *
- * @filename:  Filename unconditionally permitted to open() for reading.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_globally_readable_entry(const char *filename,
-                                                const bool is_delete)
-{
-       struct tomoyo_readable_file e = { };
-       int error;
-
-       if (!tomoyo_correct_word(filename))
-               return -EINVAL;
-       e.filename = tomoyo_get_name(filename);
-       if (!e.filename)
-               return -ENOMEM;
-       error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                    &tomoyo_policy_list
-                                    [TOMOYO_ID_GLOBALLY_READABLE],
-                                    tomoyo_same_globally_readable);
-       tomoyo_put_name(e.filename);
-       return error;
-}
-
-/**
- * tomoyo_globally_readable_file - Check if the file is unconditionnaly permitted to be open()ed for reading.
- *
- * @filename: The filename to check.
- *
- * Returns true if any domain can open @filename for reading, false otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static bool tomoyo_globally_readable_file(const struct tomoyo_path_info *
-                                            filename)
-{
-       struct tomoyo_readable_file *ptr;
-       bool found = false;
-
-       list_for_each_entry_rcu(ptr, &tomoyo_policy_list
-                               [TOMOYO_ID_GLOBALLY_READABLE], head.list) {
-               if (!ptr->head.is_deleted &&
-                   tomoyo_path_matches_pattern(filename, ptr->filename)) {
-                       found = true;
-                       break;
-               }
-       }
-       return found;
-}
-
-/**
- * tomoyo_write_globally_readable - Write "struct tomoyo_readable_file" list.
- *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_globally_readable(char *data, const bool is_delete)
-{
-       return tomoyo_update_globally_readable_entry(data, is_delete);
-}
-
-static bool tomoyo_same_pattern(const struct tomoyo_acl_head *a,
-                               const struct tomoyo_acl_head *b)
-{
-       return container_of(a, struct tomoyo_no_pattern, head)->pattern ==
-               container_of(b, struct tomoyo_no_pattern, head)->pattern;
-}
-
-/**
- * tomoyo_update_file_pattern_entry - Update "struct tomoyo_no_pattern" list.
- *
- * @pattern:   Pathname pattern.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_file_pattern_entry(const char *pattern,
-                                           const bool is_delete)
-{
-       struct tomoyo_no_pattern e = { };
-       int error;
-
-       if (!tomoyo_correct_word(pattern))
-               return -EINVAL;
-       e.pattern = tomoyo_get_name(pattern);
-       if (!e.pattern)
-               return -ENOMEM;
-       error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                    &tomoyo_policy_list[TOMOYO_ID_PATTERN],
-                                    tomoyo_same_pattern);
-       tomoyo_put_name(e.pattern);
-       return error;
-}
-
-/**
- * tomoyo_pattern - Get patterned pathname.
- *
- * @filename: The filename to find patterned pathname.
- *
- * Returns pointer to pathname pattern if matched, @filename otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-const char *tomoyo_pattern(const struct tomoyo_path_info *filename)
-{
-       struct tomoyo_no_pattern *ptr;
-       const struct tomoyo_path_info *pattern = NULL;
-
-       list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_PATTERN],
-                               head.list) {
-               if (ptr->head.is_deleted)
-                       continue;
-               if (!tomoyo_path_matches_pattern(filename, ptr->pattern))
-                       continue;
-               pattern = ptr->pattern;
-               if (tomoyo_strendswith(pattern->name, "/\\*")) {
-                       /* Do nothing. Try to find the better match. */
-               } else {
-                       /* This would be the better match. Use this. */
-                       break;
-               }
-       }
-       if (pattern)
-               filename = pattern;
-       return filename->name;
-}
-
-/**
- * tomoyo_write_pattern - Write "struct tomoyo_no_pattern" list.
- *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_pattern(char *data, const bool is_delete)
-{
-       return tomoyo_update_file_pattern_entry(data, is_delete);
-}
-
-static bool tomoyo_same_no_rewrite(const struct tomoyo_acl_head *a,
-                                  const struct tomoyo_acl_head *b)
-{
-       return container_of(a, struct tomoyo_no_rewrite, head)->pattern
-               == container_of(b, struct tomoyo_no_rewrite, head)
-               ->pattern;
-}
-
-/**
- * tomoyo_update_no_rewrite_entry - Update "struct tomoyo_no_rewrite" list.
- *
- * @pattern:   Pathname pattern that are not rewritable by default.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_no_rewrite_entry(const char *pattern,
-                                         const bool is_delete)
-{
-       struct tomoyo_no_rewrite e = { };
-       int error;
-
-       if (!tomoyo_correct_word(pattern))
-               return -EINVAL;
-       e.pattern = tomoyo_get_name(pattern);
-       if (!e.pattern)
-               return -ENOMEM;
-       error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                    &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE],
-                                    tomoyo_same_no_rewrite);
-       tomoyo_put_name(e.pattern);
-       return error;
+       return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
+                                [tomoyo_pn2mac[type]],
+                                r->param.path_number.filename->name, buffer);
 }
 
 /**
- * tomoyo_no_rewrite_file - Check if the given pathname is not permitted to be rewrited.
+ * tomoyo_check_path_acl - Check permission for path operation.
  *
- * @filename: Filename to check.
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
  *
- * Returns true if @filename is specified by "deny_rewrite" directive,
- * false otherwise.
+ * Returns true if granted, false otherwise.
  *
- * Caller holds tomoyo_read_lock().
+ * To be able to use wildcard for domain transition, this function sets
+ * matching entry on success. Since the caller holds tomoyo_read_lock(),
+ * it is safe to set matching entry.
  */
-static bool tomoyo_no_rewrite_file(const struct tomoyo_path_info *filename)
-{
-       struct tomoyo_no_rewrite *ptr;
-       bool found = false;
-
-       list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE],
-                               head.list) {
-               if (ptr->head.is_deleted)
-                       continue;
-               if (!tomoyo_path_matches_pattern(filename, ptr->pattern))
-                       continue;
-               found = true;
-               break;
-       }
-       return found;
-}
-
-/**
- * tomoyo_write_no_rewrite - Write "struct tomoyo_no_rewrite" list.
- *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_no_rewrite(char *data, const bool is_delete)
-{
-       return tomoyo_update_no_rewrite_entry(data, is_delete);
-}
-
 static bool tomoyo_check_path_acl(struct tomoyo_request_info *r,
                                  const struct tomoyo_acl_info *ptr)
 {
@@ -521,6 +261,14 @@ static bool tomoyo_check_path_acl(struct tomoyo_request_info *r,
        return false;
 }
 
+/**
+ * tomoyo_check_path_number_acl - Check permission for path number operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r,
                                         const struct tomoyo_acl_info *ptr)
 {
@@ -533,6 +281,14 @@ static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r,
                                          &acl->name);
 }
 
+/**
+ * tomoyo_check_path2_acl - Check permission for path path operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r,
                                   const struct tomoyo_acl_info *ptr)
 {
@@ -544,8 +300,16 @@ static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r,
                                             &acl->name2);
 }
 
+/**
+ * tomoyo_check_mkdev_acl - Check permission for path number number number operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r,
-                               const struct tomoyo_acl_info *ptr)
+                                  const struct tomoyo_acl_info *ptr)
 {
        const struct tomoyo_mkdev_acl *acl =
                container_of(ptr, typeof(*acl), head);
@@ -560,15 +324,31 @@ static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r,
                                          &acl->name);
 }
 
+/**
+ * tomoyo_same_path_acl - Check for duplicated "struct tomoyo_path_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_path_acl(const struct tomoyo_acl_info *a,
                                 const struct tomoyo_acl_info *b)
 {
        const struct tomoyo_path_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_path_acl *p2 = container_of(b, typeof(*p2), head);
-       return tomoyo_same_acl_head(&p1->head, &p2->head) &&
-               tomoyo_same_name_union(&p1->name, &p2->name);
+       return tomoyo_same_name_union(&p1->name, &p2->name);
 }
 
+/**
+ * tomoyo_merge_path_acl - Merge duplicated "struct tomoyo_path_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a,
                                  struct tomoyo_acl_info *b,
                                  const bool is_delete)
@@ -577,19 +357,10 @@ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a,
                ->perm;
        u16 perm = *a_perm;
        const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm;
-       if (is_delete) {
+       if (is_delete)
                perm &= ~b_perm;
-               if ((perm & TOMOYO_RW_MASK) != TOMOYO_RW_MASK)
-                       perm &= ~(1 << TOMOYO_TYPE_READ_WRITE);
-               else if (!(perm & (1 << TOMOYO_TYPE_READ_WRITE)))
-                       perm &= ~TOMOYO_RW_MASK;
-       } else {
+       else
                perm |= b_perm;
-               if ((perm & TOMOYO_RW_MASK) == TOMOYO_RW_MASK)
-                       perm |= (1 << TOMOYO_TYPE_READ_WRITE);
-               else if (perm & (1 << TOMOYO_TYPE_READ_WRITE))
-                       perm |= TOMOYO_RW_MASK;
-       }
        *a_perm = perm;
        return !perm;
 }
@@ -597,52 +368,62 @@ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a,
 /**
  * tomoyo_update_path_acl - Update "struct tomoyo_path_acl" list.
  *
- * @type:      Type of operation.
- * @filename:  Filename.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_path_acl(const u8 type, const char *filename,
-                                 struct tomoyo_domain_info * const domain,
-                                 const bool is_delete)
+static int tomoyo_update_path_acl(const u16 perm,
+                                 struct tomoyo_acl_param *param)
 {
        struct tomoyo_path_acl e = {
                .head.type = TOMOYO_TYPE_PATH_ACL,
-               .perm = 1 << type
+               .perm = perm
        };
        int error;
-       if (e.perm == (1 << TOMOYO_TYPE_READ_WRITE))
-               e.perm |= TOMOYO_RW_MASK;
-       if (!tomoyo_parse_name_union(filename, &e.name))
-               return -EINVAL;
-       error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-                                    tomoyo_same_path_acl,
-                                    tomoyo_merge_path_acl);
+       if (!tomoyo_parse_name_union(param, &e.name))
+               error = -EINVAL;
+       else
+               error = tomoyo_update_domain(&e.head, sizeof(e), param,
+                                            tomoyo_same_path_acl,
+                                            tomoyo_merge_path_acl);
        tomoyo_put_name_union(&e.name);
        return error;
 }
 
+/**
+ * tomoyo_same_mkdev_acl - Check for duplicated "struct tomoyo_mkdev_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_mkdev_acl(const struct tomoyo_acl_info *a,
                                         const struct tomoyo_acl_info *b)
 {
-       const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1),
-                                                               head);
-       const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2),
-                                                               head);
-       return tomoyo_same_acl_head(&p1->head, &p2->head)
-               && tomoyo_same_name_union(&p1->name, &p2->name)
-               && tomoyo_same_number_union(&p1->mode, &p2->mode)
-               && tomoyo_same_number_union(&p1->major, &p2->major)
-               && tomoyo_same_number_union(&p1->minor, &p2->minor);
+       const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), head);
+       const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), head);
+       return tomoyo_same_name_union(&p1->name, &p2->name) &&
+               tomoyo_same_number_union(&p1->mode, &p2->mode) &&
+               tomoyo_same_number_union(&p1->major, &p2->major) &&
+               tomoyo_same_number_union(&p1->minor, &p2->minor);
 }
 
+/**
+ * tomoyo_merge_mkdev_acl - Merge duplicated "struct tomoyo_mkdev_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a,
-                                         struct tomoyo_acl_info *b,
-                                         const bool is_delete)
+                                  struct tomoyo_acl_info *b,
+                                  const bool is_delete)
 {
        u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl,
                                         head)->perm;
@@ -660,37 +441,30 @@ static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a,
 /**
  * tomoyo_update_mkdev_acl - Update "struct tomoyo_mkdev_acl" list.
  *
- * @type:      Type of operation.
- * @filename:  Filename.
- * @mode:      Create mode.
- * @major:     Device major number.
- * @minor:     Device minor number.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_mkdev_acl(const u8 type, const char *filename,
-                                         char *mode, char *major, char *minor,
-                                         struct tomoyo_domain_info * const
-                                         domain, const bool is_delete)
+static int tomoyo_update_mkdev_acl(const u8 perm,
+                                  struct tomoyo_acl_param *param)
 {
        struct tomoyo_mkdev_acl e = {
                .head.type = TOMOYO_TYPE_MKDEV_ACL,
-               .perm = 1 << type
+               .perm = perm
        };
-       int error = is_delete ? -ENOENT : -ENOMEM;
-       if (!tomoyo_parse_name_union(filename, &e.name) ||
-           !tomoyo_parse_number_union(mode, &e.mode) ||
-           !tomoyo_parse_number_union(major, &e.major) ||
-           !tomoyo_parse_number_union(minor, &e.minor))
-               goto out;
-       error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-                                    tomoyo_same_mkdev_acl,
-                                    tomoyo_merge_mkdev_acl);
- out:
+       int error;
+       if (!tomoyo_parse_name_union(param, &e.name) ||
+           !tomoyo_parse_number_union(param, &e.mode) ||
+           !tomoyo_parse_number_union(param, &e.major) ||
+           !tomoyo_parse_number_union(param, &e.minor))
+               error = -EINVAL;
+       else
+               error = tomoyo_update_domain(&e.head, sizeof(e), param,
+                                            tomoyo_same_mkdev_acl,
+                                            tomoyo_merge_mkdev_acl);
        tomoyo_put_name_union(&e.name);
        tomoyo_put_number_union(&e.mode);
        tomoyo_put_number_union(&e.major);
@@ -698,16 +472,32 @@ static int tomoyo_update_mkdev_acl(const u8 type, const char *filename,
        return error;
 }
 
+/**
+ * tomoyo_same_path2_acl - Check for duplicated "struct tomoyo_path2_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_path2_acl(const struct tomoyo_acl_info *a,
                                  const struct tomoyo_acl_info *b)
 {
        const struct tomoyo_path2_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_path2_acl *p2 = container_of(b, typeof(*p2), head);
-       return tomoyo_same_acl_head(&p1->head, &p2->head)
-               && tomoyo_same_name_union(&p1->name1, &p2->name1)
-               && tomoyo_same_name_union(&p1->name2, &p2->name2);
+       return tomoyo_same_name_union(&p1->name1, &p2->name1) &&
+               tomoyo_same_name_union(&p1->name2, &p2->name2);
 }
 
+/**
+ * tomoyo_merge_path2_acl - Merge duplicated "struct tomoyo_path2_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a,
                                   struct tomoyo_acl_info *b,
                                   const bool is_delete)
@@ -727,33 +517,28 @@ static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a,
 /**
  * tomoyo_update_path2_acl - Update "struct tomoyo_path2_acl" list.
  *
- * @type:      Type of operation.
- * @filename1: First filename.
- * @filename2: Second filename.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-static int tomoyo_update_path2_acl(const u8 type, const char *filename1,
-                                  const char *filename2,
-                                  struct tomoyo_domain_info * const domain,
-                                  const bool is_delete)
+static int tomoyo_update_path2_acl(const u8 perm,
+                                  struct tomoyo_acl_param *param)
 {
        struct tomoyo_path2_acl e = {
                .head.type = TOMOYO_TYPE_PATH2_ACL,
-               .perm = 1 << type
+               .perm = perm
        };
-       int error = is_delete ? -ENOENT : -ENOMEM;
-       if (!tomoyo_parse_name_union(filename1, &e.name1) ||
-           !tomoyo_parse_name_union(filename2, &e.name2))
-               goto out;
-       error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-                                    tomoyo_same_path2_acl,
-                                    tomoyo_merge_path2_acl);
- out:
+       int error;
+       if (!tomoyo_parse_name_union(param, &e.name1) ||
+           !tomoyo_parse_name_union(param, &e.name2))
+               error = -EINVAL;
+       else
+               error = tomoyo_update_domain(&e.head, sizeof(e), param,
+                                            tomoyo_same_path2_acl,
+                                            tomoyo_merge_path2_acl);
        tomoyo_put_name_union(&e.name1);
        tomoyo_put_name_union(&e.name2);
        return error;
@@ -775,9 +560,8 @@ int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
 {
        int error;
 
- next:
        r->type = tomoyo_p2mac[operation];
-       r->mode = tomoyo_get_mode(r->profile, r->type);
+       r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type);
        if (r->mode == TOMOYO_CONFIG_DISABLED)
                return 0;
        r->param_type = TOMOYO_TYPE_PATH_ACL;
@@ -785,10 +569,6 @@ int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
        r->param.path.operation = operation;
        do {
                tomoyo_check_acl(r, tomoyo_check_path_acl);
-               if (!r->granted && operation == TOMOYO_TYPE_READ &&
-                   !r->domain->ignore_global_allow_read &&
-                   tomoyo_globally_readable_file(filename))
-                       r->granted = true;
                error = tomoyo_audit_path_log(r);
                /*
                 * Do not retry for execute request, for alias may have
@@ -796,19 +576,17 @@ int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
                 */
        } while (error == TOMOYO_RETRY_REQUEST &&
                 operation != TOMOYO_TYPE_EXECUTE);
-       /*
-        * Since "allow_truncate" doesn't imply "allow_rewrite" permission,
-        * we need to check "allow_rewrite" permission if the filename is
-        * specified by "deny_rewrite" keyword.
-        */
-       if (!error && operation == TOMOYO_TYPE_TRUNCATE &&
-           tomoyo_no_rewrite_file(filename)) {
-               operation = TOMOYO_TYPE_REWRITE;
-               goto next;
-       }
        return error;
 }
 
+/**
+ * tomoyo_same_path_number_acl - Check for duplicated "struct tomoyo_path_number_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
 static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a,
                                        const struct tomoyo_acl_info *b)
 {
@@ -816,11 +594,19 @@ static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a,
                                                               head);
        const struct tomoyo_path_number_acl *p2 = container_of(b, typeof(*p2),
                                                               head);
-       return tomoyo_same_acl_head(&p1->head, &p2->head)
-               && tomoyo_same_name_union(&p1->name, &p2->name)
-               && tomoyo_same_number_union(&p1->number, &p2->number);
+       return tomoyo_same_name_union(&p1->name, &p2->name) &&
+               tomoyo_same_number_union(&p1->number, &p2->number);
 }
 
+/**
+ * tomoyo_merge_path_number_acl - Merge duplicated "struct tomoyo_path_number_acl" entry.
+ *
+ * @a:         Pointer to "struct tomoyo_acl_info".
+ * @b:         Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
 static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a,
                                         struct tomoyo_acl_info *b,
                                         const bool is_delete)
@@ -841,33 +627,26 @@ static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a,
 /**
  * tomoyo_update_path_number_acl - Update ioctl/chmod/chown/chgrp ACL.
  *
- * @type:      Type of operation.
- * @filename:  Filename.
- * @number:    Number.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm:  Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_update_path_number_acl(const u8 type, const char *filename,
-                                        char *number,
-                                        struct tomoyo_domain_info * const
-                                        domain,
-                                        const bool is_delete)
+static int tomoyo_update_path_number_acl(const u8 perm,
+                                        struct tomoyo_acl_param *param)
 {
        struct tomoyo_path_number_acl e = {
                .head.type = TOMOYO_TYPE_PATH_NUMBER_ACL,
-               .perm = 1 << type
+               .perm = perm
        };
-       int error = is_delete ? -ENOENT : -ENOMEM;
-       if (!tomoyo_parse_name_union(filename, &e.name))
-               return -EINVAL;
-       if (!tomoyo_parse_number_union(number, &e.number))
-               goto out;
-       error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-                                    tomoyo_same_path_number_acl,
-                                    tomoyo_merge_path_number_acl);
- out:
+       int error;
+       if (!tomoyo_parse_name_union(param, &e.name) ||
+           !tomoyo_parse_number_union(param, &e.number))
+               error = -EINVAL;
+       else
+               error = tomoyo_update_domain(&e.head, sizeof(e), param,
+                                            tomoyo_same_path_number_acl,
+                                            tomoyo_merge_path_number_acl);
        tomoyo_put_name_union(&e.name);
        tomoyo_put_number_union(&e.number);
        return error;
@@ -886,16 +665,20 @@ int tomoyo_path_number_perm(const u8 type, struct path *path,
                            unsigned long number)
 {
        struct tomoyo_request_info r;
+       struct tomoyo_obj_info obj = {
+               .path1 = *path,
+       };
        int error = -ENOMEM;
        struct tomoyo_path_info buf;
        int idx;
 
        if (tomoyo_init_request_info(&r, NULL, tomoyo_pn2mac[type])
-           == TOMOYO_CONFIG_DISABLED || !path->mnt || !path->dentry)
+           == TOMOYO_CONFIG_DISABLED || !path->dentry)
                return 0;
        idx = tomoyo_read_lock();
        if (!tomoyo_get_realpath(&buf, path))
                goto out;
+       r.obj = &obj;
        if (type == TOMOYO_TYPE_MKDIR)
                tomoyo_add_slash(&buf);
        r.param_type = TOMOYO_TYPE_PATH_NUMBER_ACL;
@@ -930,45 +713,30 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
        int error = 0;
        struct tomoyo_path_info buf;
        struct tomoyo_request_info r;
+       struct tomoyo_obj_info obj = {
+               .path1 = *path,
+       };
        int idx;
 
-       if (!path->mnt ||
-           (path->dentry->d_inode && S_ISDIR(path->dentry->d_inode->i_mode)))
-               return 0;
        buf.name = NULL;
        r.mode = TOMOYO_CONFIG_DISABLED;
        idx = tomoyo_read_lock();
-       /*
-        * If the filename is specified by "deny_rewrite" keyword,
-        * we need to check "allow_rewrite" permission when the filename is not
-        * opened for append mode or the filename is truncated at open time.
-        */
-       if ((acc_mode & MAY_WRITE) && !(flag & O_APPEND)
-           && tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_REWRITE)
+       if (acc_mode &&
+           tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
            != TOMOYO_CONFIG_DISABLED) {
                if (!tomoyo_get_realpath(&buf, path)) {
                        error = -ENOMEM;
                        goto out;
                }
-               if (tomoyo_no_rewrite_file(&buf))
-                       error = tomoyo_path_permission(&r, TOMOYO_TYPE_REWRITE,
+               r.obj = &obj;
+               if (acc_mode & MAY_READ)
+                       error = tomoyo_path_permission(&r, TOMOYO_TYPE_READ,
+                                                      &buf);
+               if (!error && (acc_mode & MAY_WRITE))
+                       error = tomoyo_path_permission(&r, (flag & O_APPEND) ?
+                                                      TOMOYO_TYPE_APPEND :
+                                                      TOMOYO_TYPE_WRITE,
                                                       &buf);
-       }
-       if (!error && acc_mode &&
-           tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
-           != TOMOYO_CONFIG_DISABLED) {
-               u8 operation;
-               if (!buf.name && !tomoyo_get_realpath(&buf, path)) {
-                       error = -ENOMEM;
-                       goto out;
-               }
-               if (acc_mode == (MAY_READ | MAY_WRITE))
-                       operation = TOMOYO_TYPE_READ_WRITE;
-               else if (acc_mode == MAY_READ)
-                       operation = TOMOYO_TYPE_READ;
-               else
-                       operation = TOMOYO_TYPE_WRITE;
-               error = tomoyo_path_permission(&r, operation, &buf);
        }
  out:
        kfree(buf.name);
@@ -979,46 +747,57 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
 }
 
 /**
- * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "rewrite", "chroot" and "unmount".
+ * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "append", "chroot" and "unmount".
  *
  * @operation: Type of operation.
  * @path:      Pointer to "struct path".
+ * @target:    Symlink's target if @operation is TOMOYO_TYPE_SYMLINK,
+ *             NULL otherwise.
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_path_perm(const u8 operation, struct path *path)
+int tomoyo_path_perm(const u8 operation, struct path *path, const char *target)
 {
-       int error = -ENOMEM;
-       struct tomoyo_path_info buf;
        struct tomoyo_request_info r;
+       struct tomoyo_obj_info obj = {
+               .path1 = *path,
+       };
+       int error;
+       struct tomoyo_path_info buf;
+       bool is_enforce;
+       struct tomoyo_path_info symlink_target;
        int idx;
 
-       if (!path->mnt)
-               return 0;
        if (tomoyo_init_request_info(&r, NULL, tomoyo_p2mac[operation])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
+       is_enforce = (r.mode == TOMOYO_CONFIG_ENFORCING);
+       error = -ENOMEM;
        buf.name = NULL;
        idx = tomoyo_read_lock();
        if (!tomoyo_get_realpath(&buf, path))
                goto out;
+       r.obj = &obj;
        switch (operation) {
-       case TOMOYO_TYPE_REWRITE:
-               if (!tomoyo_no_rewrite_file(&buf)) {
-                       error = 0;
-                       goto out;
-               }
-               break;
        case TOMOYO_TYPE_RMDIR:
        case TOMOYO_TYPE_CHROOT:
                tomoyo_add_slash(&buf);
                break;
+       case TOMOYO_TYPE_SYMLINK:
+               symlink_target.name = tomoyo_encode(target);
+               if (!symlink_target.name)
+                       goto out;
+               tomoyo_fill_path_info(&symlink_target);
+               obj.symlink_target = &symlink_target;
+               break;
        }
        error = tomoyo_path_permission(&r, operation, &buf);
+       if (operation == TOMOYO_TYPE_SYMLINK)
+               kfree(symlink_target.name);
  out:
        kfree(buf.name);
        tomoyo_read_unlock(idx);
-       if (r.mode != TOMOYO_CONFIG_ENFORCING)
+       if (!is_enforce)
                error = 0;
        return error;
 }
@@ -1034,20 +813,23 @@ int tomoyo_path_perm(const u8 operation, struct path *path)
  * Returns 0 on success, negative value otherwise.
  */
 int tomoyo_mkdev_perm(const u8 operation, struct path *path,
-                            const unsigned int mode, unsigned int dev)
+                     const unsigned int mode, unsigned int dev)
 {
        struct tomoyo_request_info r;
+       struct tomoyo_obj_info obj = {
+               .path1 = *path,
+       };
        int error = -ENOMEM;
        struct tomoyo_path_info buf;
        int idx;
 
-       if (!path->mnt ||
-           tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
+       if (tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
        idx = tomoyo_read_lock();
        error = -ENOMEM;
        if (tomoyo_get_realpath(&buf, path)) {
+               r.obj = &obj;
                dev = new_decode_dev(dev);
                r.param_type = TOMOYO_TYPE_MKDEV_ACL;
                r.param.mkdev.filename = &buf;
@@ -1081,10 +863,13 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1,
        struct tomoyo_path_info buf1;
        struct tomoyo_path_info buf2;
        struct tomoyo_request_info r;
+       struct tomoyo_obj_info obj = {
+               .path1 = *path1,
+               .path2 = *path2,
+       };
        int idx;
 
-       if (!path1->mnt || !path2->mnt ||
-           tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
+       if (tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
        buf1.name = NULL;
@@ -1096,16 +881,17 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1,
        switch (operation) {
                struct dentry *dentry;
        case TOMOYO_TYPE_RENAME:
-        case TOMOYO_TYPE_LINK:
+       case TOMOYO_TYPE_LINK:
                dentry = path1->dentry;
-               if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
-                        break;
-                /* fall through */
-        case TOMOYO_TYPE_PIVOT_ROOT:
-                tomoyo_add_slash(&buf1);
-                tomoyo_add_slash(&buf2);
+               if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
+                       break;
+               /* fall through */
+       case TOMOYO_TYPE_PIVOT_ROOT:
+               tomoyo_add_slash(&buf1);
+               tomoyo_add_slash(&buf2);
                break;
-        }
+       }
+       r.obj = &obj;
        r.param_type = TOMOYO_TYPE_PATH2_ACL;
        r.param.path2.operation = operation;
        r.param.path2.filename1 = &buf1;
@@ -1124,53 +910,91 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1,
 }
 
 /**
+ * tomoyo_same_mount_acl - Check for duplicated "struct tomoyo_mount_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
+                                 const struct tomoyo_acl_info *b)
+{
+       const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
+       const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);
+       return tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
+               tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
+               tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
+               tomoyo_same_number_union(&p1->flags, &p2->flags);
+}
+
+/**
+ * tomoyo_update_mount_acl - Write "struct tomoyo_mount_acl" list.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+static int tomoyo_update_mount_acl(struct tomoyo_acl_param *param)
+{
+       struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
+       int error;
+       if (!tomoyo_parse_name_union(param, &e.dev_name) ||
+           !tomoyo_parse_name_union(param, &e.dir_name) ||
+           !tomoyo_parse_name_union(param, &e.fs_type) ||
+           !tomoyo_parse_number_union(param, &e.flags))
+               error = -EINVAL;
+       else
+               error = tomoyo_update_domain(&e.head, sizeof(e), param,
+                                            tomoyo_same_mount_acl, NULL);
+       tomoyo_put_name_union(&e.dev_name);
+       tomoyo_put_name_union(&e.dir_name);
+       tomoyo_put_name_union(&e.fs_type);
+       tomoyo_put_number_union(&e.flags);
+       return error;
+}
+
+/**
  * tomoyo_write_file - Update file related list.
  *
- * @data:      String to parse.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
  *
  * Returns 0 on success, negative value otherwise.
  *
  * Caller holds tomoyo_read_lock().
  */
-int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain,
-                     const bool is_delete)
+int tomoyo_write_file(struct tomoyo_acl_param *param)
 {
-       char *w[5];
+       u16 perm = 0;
        u8 type;
-       if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0])
-               return -EINVAL;
-       if (strncmp(w[0], "allow_", 6))
-               goto out;
-       w[0] += 6;
-       for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++) {
-               if (strcmp(w[0], tomoyo_path_keyword[type]))
-                       continue;
-               return tomoyo_update_path_acl(type, w[1], domain, is_delete);
-       }
-       if (!w[2][0])
-               goto out;
-       for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++) {
-               if (strcmp(w[0], tomoyo_path2_keyword[type]))
-                       continue;
-               return tomoyo_update_path2_acl(type, w[1], w[2], domain,
-                                              is_delete);
-       }
-       for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++) {
-               if (strcmp(w[0], tomoyo_path_number_keyword[type]))
-                       continue;
-               return tomoyo_update_path_number_acl(type, w[1], w[2], domain,
-                                                    is_delete);
-       }
-       if (!w[3][0] || !w[4][0])
-               goto out;
-       for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++) {
-               if (strcmp(w[0], tomoyo_mkdev_keyword[type]))
-                       continue;
-               return tomoyo_update_mkdev_acl(type, w[1], w[2], w[3],
-                                              w[4], domain, is_delete);
-       }
- out:
+       const char *operation = tomoyo_read_token(param);
+       for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++)
+               if (tomoyo_permstr(operation, tomoyo_path_keyword[type]))
+                       perm |= 1 << type;
+       if (perm)
+               return tomoyo_update_path_acl(perm, param);
+       for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++)
+               if (tomoyo_permstr(operation,
+                                  tomoyo_mac_keywords[tomoyo_pp2mac[type]]))
+                       perm |= 1 << type;
+       if (perm)
+               return tomoyo_update_path2_acl(perm, param);
+       for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++)
+               if (tomoyo_permstr(operation,
+                                  tomoyo_mac_keywords[tomoyo_pn2mac[type]]))
+                       perm |= 1 << type;
+       if (perm)
+               return tomoyo_update_path_number_acl(perm, param);
+       for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++)
+               if (tomoyo_permstr(operation,
+                                  tomoyo_mac_keywords[tomoyo_pnnn2mac[type]]))
+                       perm |= 1 << type;
+       if (perm)
+               return tomoyo_update_mkdev_acl(perm, param);
+       if (tomoyo_permstr(operation,
+                          tomoyo_mac_keywords[TOMOYO_MAC_FILE_MOUNT]))
+               return tomoyo_update_mount_acl(param);
        return -EINVAL;
 }
index a877e4c..ae135fb 100644 (file)
 /*
  * security/tomoyo/gc.c
  *
- * Implementation of the Domain-Based Mandatory Access Control.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
- *
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
 #include <linux/kthread.h>
 #include <linux/slab.h>
 
+/* The list for "struct tomoyo_io_buffer". */
+static LIST_HEAD(tomoyo_io_buffer_list);
+/* Lock for protecting tomoyo_io_buffer_list. */
+static DEFINE_SPINLOCK(tomoyo_io_buffer_list_lock);
+
+/* Size of an element. */
+static const u8 tomoyo_element_size[TOMOYO_MAX_POLICY] = {
+       [TOMOYO_ID_GROUP] = sizeof(struct tomoyo_group),
+       [TOMOYO_ID_PATH_GROUP] = sizeof(struct tomoyo_path_group),
+       [TOMOYO_ID_NUMBER_GROUP] = sizeof(struct tomoyo_number_group),
+       [TOMOYO_ID_AGGREGATOR] = sizeof(struct tomoyo_aggregator),
+       [TOMOYO_ID_TRANSITION_CONTROL] =
+       sizeof(struct tomoyo_transition_control),
+       [TOMOYO_ID_MANAGER] = sizeof(struct tomoyo_manager),
+       /* [TOMOYO_ID_CONDITION] = "struct tomoyo_condition"->size, */
+       /* [TOMOYO_ID_NAME] = "struct tomoyo_name"->size, */
+       /* [TOMOYO_ID_ACL] =
+          tomoyo_acl_size["struct tomoyo_acl_info"->type], */
+       [TOMOYO_ID_DOMAIN] = sizeof(struct tomoyo_domain_info),
+};
+
+/* Size of a domain ACL element. */
+static const u8 tomoyo_acl_size[] = {
+       [TOMOYO_TYPE_PATH_ACL] = sizeof(struct tomoyo_path_acl),
+       [TOMOYO_TYPE_PATH2_ACL] = sizeof(struct tomoyo_path2_acl),
+       [TOMOYO_TYPE_PATH_NUMBER_ACL] = sizeof(struct tomoyo_path_number_acl),
+       [TOMOYO_TYPE_MKDEV_ACL] = sizeof(struct tomoyo_mkdev_acl),
+       [TOMOYO_TYPE_MOUNT_ACL] = sizeof(struct tomoyo_mount_acl),
+};
+
+/**
+ * tomoyo_struct_used_by_io_buffer - Check whether the list element is used by /sys/kernel/security/tomoyo/ users or not.
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true if @element is used by /sys/kernel/security/tomoyo/ users,
+ * false otherwise.
+ */
+static bool tomoyo_struct_used_by_io_buffer(const struct list_head *element)
+{
+       struct tomoyo_io_buffer *head;
+       bool in_use = false;
+
+       spin_lock(&tomoyo_io_buffer_list_lock);
+       list_for_each_entry(head, &tomoyo_io_buffer_list, list) {
+               head->users++;
+               spin_unlock(&tomoyo_io_buffer_list_lock);
+               if (mutex_lock_interruptible(&head->io_sem)) {
+                       in_use = true;
+                       goto out;
+               }
+               if (head->r.domain == element || head->r.group == element ||
+                   head->r.acl == element || &head->w.domain->list == element)
+                       in_use = true;
+               mutex_unlock(&head->io_sem);
+out:
+               spin_lock(&tomoyo_io_buffer_list_lock);
+               head->users--;
+               if (in_use)
+                       break;
+       }
+       spin_unlock(&tomoyo_io_buffer_list_lock);
+       return in_use;
+}
+
+/**
+ * tomoyo_name_used_by_io_buffer - Check whether the string is used by /sys/kernel/security/tomoyo/ users or not.
+ *
+ * @string: String to check.
+ * @size:   Memory allocated for @string .
+ *
+ * Returns true if @string is used by /sys/kernel/security/tomoyo/ users,
+ * false otherwise.
+ */
+static bool tomoyo_name_used_by_io_buffer(const char *string,
+                                         const size_t size)
+{
+       struct tomoyo_io_buffer *head;
+       bool in_use = false;
+
+       spin_lock(&tomoyo_io_buffer_list_lock);
+       list_for_each_entry(head, &tomoyo_io_buffer_list, list) {
+               int i;
+               head->users++;
+               spin_unlock(&tomoyo_io_buffer_list_lock);
+               if (mutex_lock_interruptible(&head->io_sem)) {
+                       in_use = true;
+                       goto out;
+               }
+               for (i = 0; i < TOMOYO_MAX_IO_READ_QUEUE; i++) {
+                       const char *w = head->r.w[i];
+                       if (w < string || w > string + size)
+                               continue;
+                       in_use = true;
+                       break;
+               }
+               mutex_unlock(&head->io_sem);
+out:
+               spin_lock(&tomoyo_io_buffer_list_lock);
+               head->users--;
+               if (in_use)
+                       break;
+       }
+       spin_unlock(&tomoyo_io_buffer_list_lock);
+       return in_use;
+}
+
+/* Structure for garbage collection. */
 struct tomoyo_gc {
        struct list_head list;
-       int type;
+       enum tomoyo_policy_id type;
+       size_t size;
        struct list_head *element;
 };
-static LIST_HEAD(tomoyo_gc_queue);
-static DEFINE_MUTEX(tomoyo_gc_mutex);
+/* List of entries to be deleted. */
+static LIST_HEAD(tomoyo_gc_list);
+/* Length of tomoyo_gc_list. */
+static int tomoyo_gc_list_len;
 
-/* Caller holds tomoyo_policy_lock mutex. */
+/**
+ * tomoyo_add_to_gc - Add an entry to to be deleted list.
+ *
+ * @type:    One of values in "enum tomoyo_policy_id".
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true on success, false otherwise.
+ *
+ * Caller holds tomoyo_policy_lock mutex.
+ *
+ * Adding an entry needs kmalloc(). Thus, if we try to add thousands of
+ * entries at once, it will take too long time. Thus, do not add more than 128
+ * entries per a scan. But to be able to handle worst case where all entries
+ * are in-use, we accept one more entry per a scan.
+ *
+ * If we use singly linked list using "struct list_head"->prev (which is
+ * LIST_POISON2), we can avoid kmalloc().
+ */
 static bool tomoyo_add_to_gc(const int type, struct list_head *element)
 {
        struct tomoyo_gc *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
        if (!entry)
                return false;
        entry->type = type;
+       if (type == TOMOYO_ID_ACL)
+               entry->size = tomoyo_acl_size[
+                             container_of(element,
+                                          typeof(struct tomoyo_acl_info),
+                                          list)->type];
+       else if (type == TOMOYO_ID_NAME)
+               entry->size = strlen(container_of(element,
+                                                 typeof(struct tomoyo_name),
+                                                 head.list)->entry.name) + 1;
+       else if (type == TOMOYO_ID_CONDITION)
+               entry->size =
+                       container_of(element, typeof(struct tomoyo_condition),
+                                    head.list)->size;
+       else
+               entry->size = tomoyo_element_size[type];
        entry->element = element;
-       list_add(&entry->list, &tomoyo_gc_queue);
+       list_add(&entry->list, &tomoyo_gc_list);
        list_del_rcu(element);
-       return true;
+       return tomoyo_gc_list_len++ < 128;
 }
 
-static void tomoyo_del_allow_read(struct list_head *element)
-{
-       struct tomoyo_readable_file *ptr =
-               container_of(element, typeof(*ptr), head.list);
-       tomoyo_put_name(ptr->filename);
-}
-
-static void tomoyo_del_file_pattern(struct list_head *element)
-{
-       struct tomoyo_no_pattern *ptr =
-               container_of(element, typeof(*ptr), head.list);
-       tomoyo_put_name(ptr->pattern);
-}
-
-static void tomoyo_del_no_rewrite(struct list_head *element)
+/**
+ * tomoyo_element_linked_by_gc - Validate next element of an entry.
+ *
+ * @element: Pointer to an element.
+ * @size:    Size of @element in byte.
+ *
+ * Returns true if @element is linked by other elements in the garbage
+ * collector's queue, false otherwise.
+ */
+static bool tomoyo_element_linked_by_gc(const u8 *element, const size_t size)
 {
-       struct tomoyo_no_rewrite *ptr =
-               container_of(element, typeof(*ptr), head.list);
-       tomoyo_put_name(ptr->pattern);
+       struct tomoyo_gc *p;
+       list_for_each_entry(p, &tomoyo_gc_list, list) {
+               const u8 *ptr = (const u8 *) p->element->next;
+               if (ptr < element || element + size < ptr)
+                       continue;
+               return true;
+       }
+       return false;
 }
 
+/**
+ * tomoyo_del_transition_control - Delete members in "struct tomoyo_transition_control".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_transition_control(struct list_head *element)
 {
        struct tomoyo_transition_control *ptr =
@@ -61,6 +208,13 @@ static void tomoyo_del_transition_control(struct list_head *element)
        tomoyo_put_name(ptr->program);
 }
 
+/**
+ * tomoyo_del_aggregator - Delete members in "struct tomoyo_aggregator".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_aggregator(struct list_head *element)
 {
        struct tomoyo_aggregator *ptr =
@@ -69,6 +223,13 @@ static void tomoyo_del_aggregator(struct list_head *element)
        tomoyo_put_name(ptr->aggregated_name);
 }
 
+/**
+ * tomoyo_del_manager - Delete members in "struct tomoyo_manager".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_manager(struct list_head *element)
 {
        struct tomoyo_manager *ptr =
@@ -76,10 +237,18 @@ static void tomoyo_del_manager(struct list_head *element)
        tomoyo_put_name(ptr->manager);
 }
 
+/**
+ * tomoyo_del_acl - Delete members in "struct tomoyo_acl_info".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_acl(struct list_head *element)
 {
        struct tomoyo_acl_info *acl =
                container_of(element, typeof(*acl), list);
+       tomoyo_put_condition(acl->cond);
        switch (acl->type) {
        case TOMOYO_TYPE_PATH_ACL:
                {
@@ -127,6 +296,13 @@ static void tomoyo_del_acl(struct list_head *element)
        }
 }
 
+/**
+ * tomoyo_del_domain - Delete members in "struct tomoyo_domain_info".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true if deleted, false otherwise.
+ */
 static bool tomoyo_del_domain(struct list_head *element)
 {
        struct tomoyo_domain_info *domain =
@@ -165,13 +341,65 @@ static bool tomoyo_del_domain(struct list_head *element)
        return true;
 }
 
+/**
+ * tomoyo_del_condition - Delete members in "struct tomoyo_condition".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
+void tomoyo_del_condition(struct list_head *element)
+{
+       struct tomoyo_condition *cond = container_of(element, typeof(*cond),
+                                                    head.list);
+       const u16 condc = cond->condc;
+       const u16 numbers_count = cond->numbers_count;
+       const u16 names_count = cond->names_count;
+       const u16 argc = cond->argc;
+       const u16 envc = cond->envc;
+       unsigned int i;
+       const struct tomoyo_condition_element *condp
+               = (const struct tomoyo_condition_element *) (cond + 1);
+       struct tomoyo_number_union *numbers_p
+               = (struct tomoyo_number_union *) (condp + condc);
+       struct tomoyo_name_union *names_p
+               = (struct tomoyo_name_union *) (numbers_p + numbers_count);
+       const struct tomoyo_argv *argv
+               = (const struct tomoyo_argv *) (names_p + names_count);
+       const struct tomoyo_envp *envp
+               = (const struct tomoyo_envp *) (argv + argc);
+       for (i = 0; i < numbers_count; i++)
+               tomoyo_put_number_union(numbers_p++);
+       for (i = 0; i < names_count; i++)
+               tomoyo_put_name_union(names_p++);
+       for (i = 0; i < argc; argv++, i++)
+               tomoyo_put_name(argv->value);
+       for (i = 0; i < envc; envp++, i++) {
+               tomoyo_put_name(envp->name);
+               tomoyo_put_name(envp->value);
+       }
+}
 
+/**
+ * tomoyo_del_name - Delete members in "struct tomoyo_name".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_name(struct list_head *element)
 {
        const struct tomoyo_name *ptr =
-               container_of(element, typeof(*ptr), list);
+               container_of(element, typeof(*ptr), head.list);
 }
 
+/**
+ * tomoyo_del_path_group - Delete members in "struct tomoyo_path_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_path_group(struct list_head *element)
 {
        struct tomoyo_path_group *member =
@@ -179,20 +407,43 @@ static void tomoyo_del_path_group(struct list_head *element)
        tomoyo_put_name(member->member_name);
 }
 
+/**
+ * tomoyo_del_group - Delete "struct tomoyo_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_group(struct list_head *element)
 {
        struct tomoyo_group *group =
-               container_of(element, typeof(*group), list);
+               container_of(element, typeof(*group), head.list);
        tomoyo_put_name(group->group_name);
 }
 
+/**
+ * tomoyo_del_number_group - Delete members in "struct tomoyo_number_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
 static void tomoyo_del_number_group(struct list_head *element)
 {
        struct tomoyo_number_group *member =
                container_of(element, typeof(*member), head.list);
 }
 
-static bool tomoyo_collect_member(struct list_head *member_list, int id)
+/**
+ * tomoyo_collect_member - Delete elements with "struct tomoyo_acl_head".
+ *
+ * @id:          One of values in "enum tomoyo_policy_id".
+ * @member_list: Pointer to "struct list_head".
+ *
+ * Returns true if some elements are deleted, false otherwise.
+ */
+static bool tomoyo_collect_member(const enum tomoyo_policy_id id,
+                                 struct list_head *member_list)
 {
        struct tomoyo_acl_head *member;
        list_for_each_entry(member, member_list, list) {
@@ -201,13 +452,20 @@ static bool tomoyo_collect_member(struct list_head *member_list, int id)
                if (!tomoyo_add_to_gc(id, &member->list))
                        return false;
        }
-        return true;
+       return true;
 }
 
-static bool tomoyo_collect_acl(struct tomoyo_domain_info *domain)
+/**
+ * tomoyo_collect_acl - Delete elements in "struct tomoyo_domain_info".
+ *
+ * @list: Pointer to "struct list_head".
+ *
+ * Returns true if some elements are deleted, false otherwise.
+ */
+static bool tomoyo_collect_acl(struct list_head *list)
 {
        struct tomoyo_acl_info *acl;
-       list_for_each_entry(acl, &domain->acl_info_list, list) {
+       list_for_each_entry(acl, list, list) {
                if (!acl->is_deleted)
                        continue;
                if (!tomoyo_add_to_gc(TOMOYO_ID_ACL, &acl->list))
@@ -216,19 +474,24 @@ static bool tomoyo_collect_acl(struct tomoyo_domain_info *domain)
        return true;
 }
 
+/**
+ * tomoyo_collect_entry - Scan lists for deleted elements.
+ *
+ * Returns nothing.
+ */
 static void tomoyo_collect_entry(void)
 {
        int i;
+       enum tomoyo_policy_id id;
+       struct tomoyo_policy_namespace *ns;
+       int idx;
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                return;
-       for (i = 0; i < TOMOYO_MAX_POLICY; i++) {
-               if (!tomoyo_collect_member(&tomoyo_policy_list[i], i))
-                       goto unlock;
-       }
+       idx = tomoyo_read_lock();
        {
                struct tomoyo_domain_info *domain;
                list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
-                       if (!tomoyo_collect_acl(domain))
+                       if (!tomoyo_collect_acl(&domain->acl_info_list))
                                goto unlock;
                        if (!domain->is_deleted || atomic_read(&domain->users))
                                continue;
@@ -241,48 +504,93 @@ static void tomoyo_collect_entry(void)
                                goto unlock;
                }
        }
-       for (i = 0; i < TOMOYO_MAX_HASH; i++) {
-               struct tomoyo_name *ptr;
-               list_for_each_entry_rcu(ptr, &tomoyo_name_list[i], list) {
-                       if (atomic_read(&ptr->users))
-                               continue;
-                       if (!tomoyo_add_to_gc(TOMOYO_ID_NAME, &ptr->list))
+       list_for_each_entry_rcu(ns, &tomoyo_namespace_list, namespace_list) {
+               for (id = 0; id < TOMOYO_MAX_POLICY; id++)
+                       if (!tomoyo_collect_member(id, &ns->policy_list[id]))
                                goto unlock;
+               for (i = 0; i < TOMOYO_MAX_ACL_GROUPS; i++)
+                       if (!tomoyo_collect_acl(&ns->acl_group[i]))
+                               goto unlock;
+               for (i = 0; i < TOMOYO_MAX_GROUP; i++) {
+                       struct list_head *list = &ns->group_list[i];
+                       struct tomoyo_group *group;
+                       switch (i) {
+                       case 0:
+                               id = TOMOYO_ID_PATH_GROUP;
+                               break;
+                       default:
+                               id = TOMOYO_ID_NUMBER_GROUP;
+                               break;
+                       }
+                       list_for_each_entry(group, list, head.list) {
+                               if (!tomoyo_collect_member
+                                   (id, &group->member_list))
+                                       goto unlock;
+                               if (!list_empty(&group->member_list) ||
+                                   atomic_read(&group->head.users))
+                                       continue;
+                               if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP,
+                                                     &group->head.list))
+                                       goto unlock;
+                       }
                }
        }
-       for (i = 0; i < TOMOYO_MAX_GROUP; i++) {
-               struct list_head *list = &tomoyo_group_list[i];
-               int id;
-               struct tomoyo_group *group;
-               switch (i) {
-               case 0:
-                       id = TOMOYO_ID_PATH_GROUP;
-                       break;
-               default:
-                       id = TOMOYO_ID_NUMBER_GROUP;
-                       break;
-               }
-               list_for_each_entry(group, list, list) {
-                       if (!tomoyo_collect_member(&group->member_list, id))
-                               goto unlock;
-                       if (!list_empty(&group->member_list) ||
-                           atomic_read(&group->users))
+       id = TOMOYO_ID_CONDITION;
+       for (i = 0; i < TOMOYO_MAX_HASH + 1; i++) {
+               struct list_head *list = !i ?
+                       &tomoyo_condition_list : &tomoyo_name_list[i - 1];
+               struct tomoyo_shared_acl_head *ptr;
+               list_for_each_entry(ptr, list, list) {
+                       if (atomic_read(&ptr->users))
                                continue;
-                       if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP, &group->list))
+                       if (!tomoyo_add_to_gc(id, &ptr->list))
                                goto unlock;
                }
+               id = TOMOYO_ID_NAME;
        }
- unlock:
+unlock:
+       tomoyo_read_unlock(idx);
        mutex_unlock(&tomoyo_policy_lock);
 }
 
-static void tomoyo_kfree_entry(void)
+/**
+ * tomoyo_kfree_entry - Delete entries in tomoyo_gc_list.
+ *
+ * Returns true if some entries were kfree()d, false otherwise.
+ */
+static bool tomoyo_kfree_entry(void)
 {
        struct tomoyo_gc *p;
        struct tomoyo_gc *tmp;
+       bool result = false;
 
-       list_for_each_entry_safe(p, tmp, &tomoyo_gc_queue, list) {
+       list_for_each_entry_safe(p, tmp, &tomoyo_gc_list, list) {
                struct list_head *element = p->element;
+
+               /*
+                * list_del_rcu() in tomoyo_add_to_gc() guarantees that the
+                * list element became no longer reachable from the list which
+                * the element was originally on (e.g. tomoyo_domain_list).
+                * Also, synchronize_srcu() in tomoyo_gc_thread() guarantees
+                * that the list element became no longer referenced by syscall
+                * users.
+                *
+                * However, there are three users which may still be using the
+                * list element. We need to defer until all of these users
+                * forget the list element.
+                *
+                * Firstly, defer until "struct tomoyo_io_buffer"->r.{domain,
+                * group,acl} and "struct tomoyo_io_buffer"->w.domain forget
+                * the list element.
+                */
+               if (tomoyo_struct_used_by_io_buffer(element))
+                       continue;
+               /*
+                * Secondly, defer until all other elements in the
+                * tomoyo_gc_list list forget the list element.
+                */
+               if (tomoyo_element_linked_by_gc((const u8 *) element, p->size))
+                       continue;
                switch (p->type) {
                case TOMOYO_ID_TRANSITION_CONTROL:
                        tomoyo_del_transition_control(element);
@@ -290,19 +598,21 @@ static void tomoyo_kfree_entry(void)
                case TOMOYO_ID_AGGREGATOR:
                        tomoyo_del_aggregator(element);
                        break;
-               case TOMOYO_ID_GLOBALLY_READABLE:
-                       tomoyo_del_allow_read(element);
-                       break;
-               case TOMOYO_ID_PATTERN:
-                       tomoyo_del_file_pattern(element);
-                       break;
-               case TOMOYO_ID_NO_REWRITE:
-                       tomoyo_del_no_rewrite(element);
-                       break;
                case TOMOYO_ID_MANAGER:
                        tomoyo_del_manager(element);
                        break;
+               case TOMOYO_ID_CONDITION:
+                       tomoyo_del_condition(element);
+                       break;
                case TOMOYO_ID_NAME:
+                       /*
+                        * Thirdly, defer until all "struct tomoyo_io_buffer"
+                        * ->r.w[] forget the list element.
+                        */
+                       if (tomoyo_name_used_by_io_buffer(
+                           container_of(element, typeof(struct tomoyo_name),
+                                        head.list)->entry.name, p->size))
+                               continue;
                        tomoyo_del_name(element);
                        break;
                case TOMOYO_ID_ACL:
@@ -321,34 +631,95 @@ static void tomoyo_kfree_entry(void)
                case TOMOYO_ID_NUMBER_GROUP:
                        tomoyo_del_number_group(element);
                        break;
+               case TOMOYO_MAX_POLICY:
+                       break;
                }
                tomoyo_memory_free(element);
                list_del(&p->list);
                kfree(p);
+               tomoyo_gc_list_len--;
+               result = true;
        }
+       return result;
 }
 
+/**
+ * tomoyo_gc_thread - Garbage collector thread function.
+ *
+ * @unused: Unused.
+ *
+ * In case OOM-killer choose this thread for termination, we create this thread
+ * as a short live thread whenever /sys/kernel/security/tomoyo/ interface was
+ * close()d.
+ *
+ * Returns 0.
+ */
 static int tomoyo_gc_thread(void *unused)
 {
+       /* Garbage collector thread is exclusive. */
+       static DEFINE_MUTEX(tomoyo_gc_mutex);
+       if (!mutex_trylock(&tomoyo_gc_mutex))
+               goto out;
        daemonize("GC for TOMOYO");
-       if (mutex_trylock(&tomoyo_gc_mutex)) {
-               int i;
-               for (i = 0; i < 10; i++) {
-                       tomoyo_collect_entry();
-                       if (list_empty(&tomoyo_gc_queue))
-                               break;
-                       synchronize_srcu(&tomoyo_ss);
-                       tomoyo_kfree_entry();
+       do {
+               tomoyo_collect_entry();
+               if (list_empty(&tomoyo_gc_list))
+                       break;
+               synchronize_srcu(&tomoyo_ss);
+       } while (tomoyo_kfree_entry());
+       {
+               struct tomoyo_io_buffer *head;
+               struct tomoyo_io_buffer *tmp;
+
+               spin_lock(&tomoyo_io_buffer_list_lock);
+               list_for_each_entry_safe(head, tmp, &tomoyo_io_buffer_list,
+                                        list) {
+                       if (head->users)
+                               continue;
+                       list_del(&head->list);
+                       kfree(head->read_buf);
+                       kfree(head->write_buf);
+                       kfree(head);
                }
-               mutex_unlock(&tomoyo_gc_mutex);
+               spin_unlock(&tomoyo_io_buffer_list_lock);
        }
-       do_exit(0);
+       mutex_unlock(&tomoyo_gc_mutex);
+out:
+       /* This acts as do_exit(0). */
+       return 0;
 }
 
-void tomoyo_run_gc(void)
+/**
+ * tomoyo_notify_gc - Register/unregister /sys/kernel/security/tomoyo/ users.
+ *
+ * @head:        Pointer to "struct tomoyo_io_buffer".
+ * @is_register: True if register, false if unregister.
+ *
+ * Returns nothing.
+ */
+void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register)
 {
-       struct task_struct *task = kthread_create(tomoyo_gc_thread, NULL,
-                                                 "GC for TOMOYO");
-       if (!IS_ERR(task))
-               wake_up_process(task);
+       bool is_write = false;
+
+       spin_lock(&tomoyo_io_buffer_list_lock);
+       if (is_register) {
+               head->users = 1;
+               list_add(&head->list, &tomoyo_io_buffer_list);
+       } else {
+               is_write = head->write_buf != NULL;
+               if (!--head->users) {
+                       list_del(&head->list);
+                       kfree(head->read_buf);
+                       kfree(head->write_buf);
+                       kfree(head);
+               }
+       }
+       spin_unlock(&tomoyo_io_buffer_list_lock);
+       if (is_write) {
+               struct task_struct *task = kthread_create(tomoyo_gc_thread,
+                                                         NULL,
+                                                         "GC for TOMOYO");
+               if (!IS_ERR(task))
+                       wake_up_process(task);
+       }
 }
index e94352c..5fb0e12 100644 (file)
@@ -1,21 +1,37 @@
 /*
  * security/tomoyo/group.c
  *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/slab.h>
 #include "common.h"
 
+/**
+ * tomoyo_same_path_group - Check for duplicated "struct tomoyo_path_group" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_path_group(const struct tomoyo_acl_head *a,
-                               const struct tomoyo_acl_head *b)
+                                  const struct tomoyo_acl_head *b)
 {
        return container_of(a, struct tomoyo_path_group, head)->member_name ==
                container_of(b, struct tomoyo_path_group, head)->member_name;
 }
 
+/**
+ * tomoyo_same_number_group - Check for duplicated "struct tomoyo_number_group" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
 static bool tomoyo_same_number_group(const struct tomoyo_acl_head *a,
-                                 const struct tomoyo_acl_head *b)
+                                    const struct tomoyo_acl_head *b)
 {
        return !memcmp(&container_of(a, struct tomoyo_number_group, head)
                       ->number,
@@ -28,48 +44,41 @@ static bool tomoyo_same_number_group(const struct tomoyo_acl_head *a,
 /**
  * tomoyo_write_group - Write "struct tomoyo_path_group"/"struct tomoyo_number_group" list.
  *
- * @data:      String to parse.
- * @is_delete: True if it is a delete request.
- * @type:      Type of this group.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @type:  Type of this group.
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_write_group(char *data, const bool is_delete, const u8 type)
+int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type)
 {
-       struct tomoyo_group *group;
-       struct list_head *member;
-       char *w[2];
+       struct tomoyo_group *group = tomoyo_get_group(param, type);
        int error = -EINVAL;
-       if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0])
-               return -EINVAL;
-       group = tomoyo_get_group(w[0], type);
        if (!group)
                return -ENOMEM;
-       member = &group->member_list;
+       param->list = &group->member_list;
        if (type == TOMOYO_PATH_GROUP) {
                struct tomoyo_path_group e = { };
-               e.member_name = tomoyo_get_name(w[1]);
+               e.member_name = tomoyo_get_name(tomoyo_read_token(param));
                if (!e.member_name) {
                        error = -ENOMEM;
                        goto out;
                }
-               error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                            member, tomoyo_same_path_group);
+               error = tomoyo_update_policy(&e.head, sizeof(e), param,
+                                         tomoyo_same_path_group);
                tomoyo_put_name(e.member_name);
        } else if (type == TOMOYO_NUMBER_GROUP) {
                struct tomoyo_number_group e = { };
-               if (w[1][0] == '@'
-                   || !tomoyo_parse_number_union(w[1], &e.number)
-                   || e.number.values[0] > e.number.values[1])
+               if (param->data[0] == '@' ||
+                   !tomoyo_parse_number_union(param, &e.number))
                        goto out;
-               error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
-                                            member, tomoyo_same_number_group);
+               error = tomoyo_update_policy(&e.head, sizeof(e), param,
+                                         tomoyo_same_number_group);
                /*
                 * tomoyo_put_number_union() is not needed because
-                * w[1][0] != '@'.
+                * param->data[0] != '@'.
                 */
        }
- out:
+out:
        tomoyo_put_group(group);
        return error;
 }
@@ -77,8 +86,8 @@ int tomoyo_write_group(char *data, const bool is_delete, const u8 type)
 /**
  * tomoyo_path_matches_group - Check whether the given pathname matches members of the given pathname group.
  *
- * @pathname:        The name of pathname.
- * @group:           Pointer to "struct tomoyo_path_group".
+ * @pathname: The name of pathname.
+ * @group:    Pointer to "struct tomoyo_path_group".
  *
  * Returns matched member's pathname if @pathname matches pathnames in @group,
  * NULL otherwise.
index 3312e56..6797540 100644 (file)
@@ -1,15 +1,32 @@
 /*
  * security/tomoyo/load_policy.c
  *
- * Policy loader launcher for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include "common.h"
 
-/* path to policy loader */
-static const char *tomoyo_loader = "/sbin/tomoyo-init";
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+
+/*
+ * Path to the policy loader. (default = CONFIG_SECURITY_TOMOYO_POLICY_LOADER)
+ */
+static const char *tomoyo_loader;
+
+/**
+ * tomoyo_loader_setup - Set policy loader.
+ *
+ * @str: Program to use as a policy loader (e.g. /sbin/tomoyo-init ).
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_loader_setup(char *str)
+{
+       tomoyo_loader = str;
+       return 0;
+}
+
+__setup("TOMOYO_loader=", tomoyo_loader_setup);
 
 /**
  * tomoyo_policy_loader_exists - Check whether /sbin/tomoyo-init exists.
@@ -18,24 +35,38 @@ static const char *tomoyo_loader = "/sbin/tomoyo-init";
  */
 static bool tomoyo_policy_loader_exists(void)
 {
-       /*
-        * Don't activate MAC if the policy loader doesn't exist.
-        * If the initrd includes /sbin/init but real-root-dev has not
-        * mounted on / yet, activating MAC will block the system since
-        * policies are not loaded yet.
-        * Thus, let do_execve() call this function every time.
-        */
        struct path path;
-
+       if (!tomoyo_loader)
+               tomoyo_loader = CONFIG_SECURITY_TOMOYO_POLICY_LOADER;
        if (kern_path(tomoyo_loader, LOOKUP_FOLLOW, &path)) {
-               printk(KERN_INFO "Not activating Mandatory Access Control now "
-                      "since %s doesn't exist.\n", tomoyo_loader);
+               printk(KERN_INFO "Not activating Mandatory Access Control "
+                      "as %s does not exist.\n", tomoyo_loader);
                return false;
        }
        path_put(&path);
        return true;
 }
 
+/*
+ * Path to the trigger. (default = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER)
+ */
+static const char *tomoyo_trigger;
+
+/**
+ * tomoyo_trigger_setup - Set trigger for activation.
+ *
+ * @str: Program to use as an activation trigger (e.g. /sbin/init ).
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_trigger_setup(char *str)
+{
+       tomoyo_trigger = str;
+       return 0;
+}
+
+__setup("TOMOYO_trigger=", tomoyo_trigger_setup);
+
 /**
  * tomoyo_load_policy - Run external policy loader to load policy.
  *
@@ -51,24 +82,19 @@ static bool tomoyo_policy_loader_exists(void)
  */
 void tomoyo_load_policy(const char *filename)
 {
+       static bool done;
        char *argv[2];
        char *envp[3];
 
-       if (tomoyo_policy_loaded)
+       if (tomoyo_policy_loaded || done)
                return;
-       /*
-        * Check filename is /sbin/init or /sbin/tomoyo-start.
-        * /sbin/tomoyo-start is a dummy filename in case where /sbin/init can't
-        * be passed.
-        * You can create /sbin/tomoyo-start by
-        * "ln -s /bin/true /sbin/tomoyo-start".
-        */
-       if (strcmp(filename, "/sbin/init") &&
-           strcmp(filename, "/sbin/tomoyo-start"))
+       if (!tomoyo_trigger)
+               tomoyo_trigger = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER;
+       if (strcmp(filename, tomoyo_trigger))
                return;
        if (!tomoyo_policy_loader_exists())
                return;
-
+       done = true;
        printk(KERN_INFO "Calling %s to load policy. Please wait.\n",
               tomoyo_loader);
        argv[0] = (char *) tomoyo_loader;
@@ -79,3 +105,5 @@ void tomoyo_load_policy(const char *filename)
        call_usermodehelper(argv[0], argv, envp, 1);
        tomoyo_check_profile();
 }
+
+#endif
index 42a7b1b..7a56051 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/memory.c
  *
- * Memory management functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/hash.h>
@@ -29,10 +27,12 @@ void tomoyo_warn_oom(const char *function)
                panic("MAC Initialization failed.\n");
 }
 
-/* Memory allocated for policy. */
-static atomic_t tomoyo_policy_memory_size;
-/* Quota for holding policy. */
-static unsigned int tomoyo_quota_for_policy;
+/* Lock for protecting tomoyo_memory_used. */
+static DEFINE_SPINLOCK(tomoyo_policy_memory_lock);
+/* Memoy currently used by policy/audit log/query. */
+unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
+/* Memory quota for "policy"/"audit log"/"query". */
+unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
 
 /**
  * tomoyo_memory_ok - Check memory quota.
@@ -45,15 +45,20 @@ static unsigned int tomoyo_quota_for_policy;
  */
 bool tomoyo_memory_ok(void *ptr)
 {
-       size_t s = ptr ? ksize(ptr) : 0;
-       atomic_add(s, &tomoyo_policy_memory_size);
-       if (ptr && (!tomoyo_quota_for_policy ||
-                   atomic_read(&tomoyo_policy_memory_size)
-                   <= tomoyo_quota_for_policy)) {
-               memset(ptr, 0, s);
-               return true;
+       if (ptr) {
+               const size_t s = ksize(ptr);
+               bool result;
+               spin_lock(&tomoyo_policy_memory_lock);
+               tomoyo_memory_used[TOMOYO_MEMORY_POLICY] += s;
+               result = !tomoyo_memory_quota[TOMOYO_MEMORY_POLICY] ||
+                       tomoyo_memory_used[TOMOYO_MEMORY_POLICY] <=
+                       tomoyo_memory_quota[TOMOYO_MEMORY_POLICY];
+               if (!result)
+                       tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s;
+               spin_unlock(&tomoyo_policy_memory_lock);
+               if (result)
+                       return true;
        }
-       atomic_sub(s, &tomoyo_policy_memory_size);
        tomoyo_warn_oom(__func__);
        return false;
 }
@@ -86,22 +91,28 @@ void *tomoyo_commit_ok(void *data, const unsigned int size)
  */
 void tomoyo_memory_free(void *ptr)
 {
-       atomic_sub(ksize(ptr), &tomoyo_policy_memory_size);
+       size_t s = ksize(ptr);
+       spin_lock(&tomoyo_policy_memory_lock);
+       tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s;
+       spin_unlock(&tomoyo_policy_memory_lock);
        kfree(ptr);
 }
 
 /**
  * tomoyo_get_group - Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group".
  *
- * @group_name: The name of address group.
- * @idx:        Index number.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @idx:   Index number.
  *
  * Returns pointer to "struct tomoyo_group" on success, NULL otherwise.
  */
-struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx)
+struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
+                                     const u8 idx)
 {
        struct tomoyo_group e = { };
        struct tomoyo_group *group = NULL;
+       struct list_head *list;
+       const char *group_name = tomoyo_read_token(param);
        bool found = false;
        if (!tomoyo_correct_word(group_name) || idx >= TOMOYO_MAX_GROUP)
                return NULL;
@@ -110,10 +121,11 @@ struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx)
                return NULL;
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
-       list_for_each_entry(group, &tomoyo_group_list[idx], list) {
+       list = &param->ns->group_list[idx];
+       list_for_each_entry(group, list, head.list) {
                if (e.group_name != group->group_name)
                        continue;
-               atomic_inc(&group->users);
+               atomic_inc(&group->head.users);
                found = true;
                break;
        }
@@ -121,15 +133,14 @@ struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx)
                struct tomoyo_group *entry = tomoyo_commit_ok(&e, sizeof(e));
                if (entry) {
                        INIT_LIST_HEAD(&entry->member_list);
-                       atomic_set(&entry->users, 1);
-                       list_add_tail_rcu(&entry->list,
-                                         &tomoyo_group_list[idx]);
+                       atomic_set(&entry->head.users, 1);
+                       list_add_tail_rcu(&entry->head.list, list);
                        group = entry;
                        found = true;
                }
        }
        mutex_unlock(&tomoyo_policy_lock);
- out:
+out:
        tomoyo_put_name(e.group_name);
        return found ? group : NULL;
 }
@@ -154,7 +165,6 @@ const struct tomoyo_path_info *tomoyo_get_name(const char *name)
        struct tomoyo_name *ptr;
        unsigned int hash;
        int len;
-       int allocated_len;
        struct list_head *head;
 
        if (!name)
@@ -164,120 +174,43 @@ const struct tomoyo_path_info *tomoyo_get_name(const char *name)
        head = &tomoyo_name_list[hash_long(hash, TOMOYO_HASH_BITS)];
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                return NULL;
-       list_for_each_entry(ptr, head, list) {
+       list_for_each_entry(ptr, head, head.list) {
                if (hash != ptr->entry.hash || strcmp(name, ptr->entry.name))
                        continue;
-               atomic_inc(&ptr->users);
+               atomic_inc(&ptr->head.users);
                goto out;
        }
        ptr = kzalloc(sizeof(*ptr) + len, GFP_NOFS);
-       allocated_len = ptr ? ksize(ptr) : 0;
-       if (!ptr || (tomoyo_quota_for_policy &&
-                    atomic_read(&tomoyo_policy_memory_size) + allocated_len
-                    > tomoyo_quota_for_policy)) {
+       if (tomoyo_memory_ok(ptr)) {
+               ptr->entry.name = ((char *) ptr) + sizeof(*ptr);
+               memmove((char *) ptr->entry.name, name, len);
+               atomic_set(&ptr->head.users, 1);
+               tomoyo_fill_path_info(&ptr->entry);
+               list_add_tail(&ptr->head.list, head);
+       } else {
                kfree(ptr);
                ptr = NULL;
-               tomoyo_warn_oom(__func__);
-               goto out;
        }
-       atomic_add(allocated_len, &tomoyo_policy_memory_size);
-       ptr->entry.name = ((char *) ptr) + sizeof(*ptr);
-       memmove((char *) ptr->entry.name, name, len);
-       atomic_set(&ptr->users, 1);
-       tomoyo_fill_path_info(&ptr->entry);
-       list_add_tail(&ptr->list, head);
- out:
+out:
        mutex_unlock(&tomoyo_policy_lock);
        return ptr ? &ptr->entry : NULL;
 }
 
+/* Initial namespace.*/
+struct tomoyo_policy_namespace tomoyo_kernel_namespace;
+
 /**
  * tomoyo_mm_init - Initialize mm related code.
  */
 void __init tomoyo_mm_init(void)
 {
        int idx;
-
-       for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
-               INIT_LIST_HEAD(&tomoyo_policy_list[idx]);
-       for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
-               INIT_LIST_HEAD(&tomoyo_group_list[idx]);
        for (idx = 0; idx < TOMOYO_MAX_HASH; idx++)
                INIT_LIST_HEAD(&tomoyo_name_list[idx]);
+       tomoyo_kernel_namespace.name = "<kernel>";
+       tomoyo_init_policy_namespace(&tomoyo_kernel_namespace);
+       tomoyo_kernel_domain.ns = &tomoyo_kernel_namespace;
        INIT_LIST_HEAD(&tomoyo_kernel_domain.acl_info_list);
-       tomoyo_kernel_domain.domainname = tomoyo_get_name(TOMOYO_ROOT_NAME);
+       tomoyo_kernel_domain.domainname = tomoyo_get_name("<kernel>");
        list_add_tail_rcu(&tomoyo_kernel_domain.list, &tomoyo_domain_list);
-       idx = tomoyo_read_lock();
-       if (tomoyo_find_domain(TOMOYO_ROOT_NAME) != &tomoyo_kernel_domain)
-               panic("Can't register tomoyo_kernel_domain");
-       {
-               /* Load built-in policy. */
-               tomoyo_write_transition_control("/sbin/hotplug", false,
-                                       TOMOYO_TRANSITION_CONTROL_INITIALIZE);
-               tomoyo_write_transition_control("/sbin/modprobe", false,
-                                       TOMOYO_TRANSITION_CONTROL_INITIALIZE);
-       }
-       tomoyo_read_unlock(idx);
-}
-
-
-/* Memory allocated for query lists. */
-unsigned int tomoyo_query_memory_size;
-/* Quota for holding query lists. */
-unsigned int tomoyo_quota_for_query;
-
-/**
- * tomoyo_read_memory_counter - Check for memory usage in bytes.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns memory usage.
- */
-void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head)
-{
-       if (!head->r.eof) {
-               const unsigned int policy
-                       = atomic_read(&tomoyo_policy_memory_size);
-               const unsigned int query = tomoyo_query_memory_size;
-               char buffer[64];
-
-               memset(buffer, 0, sizeof(buffer));
-               if (tomoyo_quota_for_policy)
-                       snprintf(buffer, sizeof(buffer) - 1,
-                                "   (Quota: %10u)",
-                                tomoyo_quota_for_policy);
-               else
-                       buffer[0] = '\0';
-               tomoyo_io_printf(head, "Policy:       %10u%s\n", policy,
-                                buffer);
-               if (tomoyo_quota_for_query)
-                       snprintf(buffer, sizeof(buffer) - 1,
-                                "   (Quota: %10u)",
-                                tomoyo_quota_for_query);
-               else
-                       buffer[0] = '\0';
-               tomoyo_io_printf(head, "Query lists:  %10u%s\n", query,
-                                buffer);
-               tomoyo_io_printf(head, "Total:        %10u\n", policy + query);
-               head->r.eof = true;
-       }
-}
-
-/**
- * tomoyo_write_memory_quota - Set memory quota.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns 0.
- */
-int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head)
-{
-       char *data = head->write_buf;
-       unsigned int size;
-
-       if (sscanf(data, "Policy: %u", &size) == 1)
-               tomoyo_quota_for_policy = size;
-       else if (sscanf(data, "Query lists: %u", &size) == 1)
-               tomoyo_quota_for_query = size;
-       return 0;
 }
index 9fc2e15..bee09d0 100644 (file)
@@ -1,28 +1,22 @@
 /*
  * security/tomoyo/mount.c
  *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/slab.h>
 #include "common.h"
 
-/* Keywords for mount restrictions. */
-
-/* Allow to call 'mount --bind /source_dir /dest_dir' */
-#define TOMOYO_MOUNT_BIND_KEYWORD                        "--bind"
-/* Allow to call 'mount --move /old_dir    /new_dir ' */
-#define TOMOYO_MOUNT_MOVE_KEYWORD                        "--move"
-/* Allow to call 'mount -o remount /dir             ' */
-#define TOMOYO_MOUNT_REMOUNT_KEYWORD                     "--remount"
-/* Allow to call 'mount --make-unbindable /dir'       */
-#define TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD             "--make-unbindable"
-/* Allow to call 'mount --make-private /dir'          */
-#define TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD                "--make-private"
-/* Allow to call 'mount --make-slave /dir'            */
-#define TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD                  "--make-slave"
-/* Allow to call 'mount --make-shared /dir'           */
-#define TOMOYO_MOUNT_MAKE_SHARED_KEYWORD                 "--make-shared"
+/* String table for special mount operations. */
+static const char * const tomoyo_mounts[TOMOYO_MAX_SPECIAL_MOUNT] = {
+       [TOMOYO_MOUNT_BIND]            = "--bind",
+       [TOMOYO_MOUNT_MOVE]            = "--move",
+       [TOMOYO_MOUNT_REMOUNT]         = "--remount",
+       [TOMOYO_MOUNT_MAKE_UNBINDABLE] = "--make-unbindable",
+       [TOMOYO_MOUNT_MAKE_PRIVATE]    = "--make-private",
+       [TOMOYO_MOUNT_MAKE_SLAVE]      = "--make-slave",
+       [TOMOYO_MOUNT_MAKE_SHARED]     = "--make-shared",
+};
 
 /**
  * tomoyo_audit_mount_log - Audit mount log.
  */
 static int tomoyo_audit_mount_log(struct tomoyo_request_info *r)
 {
-       const char *dev = r->param.mount.dev->name;
-       const char *dir = r->param.mount.dir->name;
-       const char *type = r->param.mount.type->name;
-       const unsigned long flags = r->param.mount.flags;
-       if (r->granted)
-               return 0;
-       if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD))
-               tomoyo_warn_log(r, "mount -o remount %s 0x%lX", dir, flags);
-       else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD)
-                || !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD))
-               tomoyo_warn_log(r, "mount %s %s %s 0x%lX", type, dev, dir,
-                               flags);
-       else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) ||
-                !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) ||
-                !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) ||
-                !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD))
-               tomoyo_warn_log(r, "mount %s %s 0x%lX", type, dir, flags);
-       else
-               tomoyo_warn_log(r, "mount -t %s %s %s 0x%lX", type, dev, dir,
-                               flags);
-       return tomoyo_supervisor(r,
-                                TOMOYO_KEYWORD_ALLOW_MOUNT "%s %s %s 0x%lX\n",
-                                tomoyo_pattern(r->param.mount.dev),
-                                tomoyo_pattern(r->param.mount.dir), type,
-                                flags);
+       return tomoyo_supervisor(r, "file mount %s %s %s 0x%lX\n",
+                                r->param.mount.dev->name,
+                                r->param.mount.dir->name,
+                                r->param.mount.type->name,
+                                r->param.mount.flags);
 }
 
+/**
+ * tomoyo_check_mount_acl - Check permission for path path path number operation.
+ *
+ * @r:   Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
 static bool tomoyo_check_mount_acl(struct tomoyo_request_info *r,
                                   const struct tomoyo_acl_info *ptr)
 {
        const struct tomoyo_mount_acl *acl =
                container_of(ptr, typeof(*acl), head);
-       return tomoyo_compare_number_union(r->param.mount.flags, &acl->flags) &&
-               tomoyo_compare_name_union(r->param.mount.type, &acl->fs_type) &&
-               tomoyo_compare_name_union(r->param.mount.dir, &acl->dir_name) &&
+       return tomoyo_compare_number_union(r->param.mount.flags,
+                                          &acl->flags) &&
+               tomoyo_compare_name_union(r->param.mount.type,
+                                         &acl->fs_type) &&
+               tomoyo_compare_name_union(r->param.mount.dir,
+                                         &acl->dir_name) &&
                (!r->param.mount.need_dev ||
-                tomoyo_compare_name_union(r->param.mount.dev, &acl->dev_name));
+                tomoyo_compare_name_union(r->param.mount.dev,
+                                          &acl->dev_name));
 }
 
 /**
  * tomoyo_mount_acl - Check permission for mount() operation.
  *
  * @r:        Pointer to "struct tomoyo_request_info".
- * @dev_name: Name of device file.
+ * @dev_name: Name of device file. Maybe NULL.
  * @dir:      Pointer to "struct path".
  * @type:     Name of filesystem type.
  * @flags:    Mount options.
@@ -86,8 +72,10 @@ static bool tomoyo_check_mount_acl(struct tomoyo_request_info *r,
  * Caller holds tomoyo_read_lock().
  */
 static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
-                           struct path *dir, char *type, unsigned long flags)
+                           struct path *dir, const char *type,
+                           unsigned long flags)
 {
+       struct tomoyo_obj_info obj = { };
        struct path path;
        struct file_system_type *fstype = NULL;
        const char *requested_type = NULL;
@@ -98,6 +86,7 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
        struct tomoyo_path_info rdir;
        int need_dev = 0;
        int error = -ENOMEM;
+       r->obj = &obj;
 
        /* Get fstype. */
        requested_type = tomoyo_encode(type);
@@ -107,6 +96,7 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
        tomoyo_fill_path_info(&rtype);
 
        /* Get mount point. */
+       obj.path2 = *dir;
        requested_dir_name = tomoyo_realpath_from_path(dir);
        if (!requested_dir_name) {
                error = -ENOMEM;
@@ -116,15 +106,15 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
        tomoyo_fill_path_info(&rdir);
 
        /* Compare fs name. */
-       if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD)) {
+       if (type == tomoyo_mounts[TOMOYO_MOUNT_REMOUNT]) {
                /* dev_name is ignored. */
-       } else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) ||
-                  !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) ||
-                  !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) ||
-                  !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD)) {
+       } else if (type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE] ||
+                  type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE] ||
+                  type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE] ||
+                  type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED]) {
                /* dev_name is ignored. */
-       } else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD) ||
-                  !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD)) {
+       } else if (type == tomoyo_mounts[TOMOYO_MOUNT_BIND] ||
+                  type == tomoyo_mounts[TOMOYO_MOUNT_MOVE]) {
                need_dev = -1; /* dev_name is a directory */
        } else {
                fstype = get_fs_type(type);
@@ -142,8 +132,8 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
                        error = -ENOENT;
                        goto out;
                }
+               obj.path1 = path;
                requested_dev_name = tomoyo_realpath_from_path(&path);
-               path_put(&path);
                if (!requested_dev_name) {
                        error = -ENOENT;
                        goto out;
@@ -176,22 +166,26 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
        if (fstype)
                put_filesystem(fstype);
        kfree(requested_type);
+       /* Drop refcount obtained by kern_path(). */
+       if (obj.path1.dentry)
+               path_put(&obj.path1);
        return error;
 }
 
 /**
  * tomoyo_mount_permission - Check permission for mount() operation.
  *
- * @dev_name:  Name of device file.
+ * @dev_name:  Name of device file. Maybe NULL.
  * @path:      Pointer to "struct path".
- * @type:      Name of filesystem type. May be NULL.
+ * @type:      Name of filesystem type. Maybe NULL.
  * @flags:     Mount options.
- * @data_page: Optional data. May be NULL.
+ * @data_page: Optional data. Maybe NULL.
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
-                           unsigned long flags, void *data_page)
+int tomoyo_mount_permission(char *dev_name, struct path *path,
+                           const char *type, unsigned long flags,
+                           void *data_page)
 {
        struct tomoyo_request_info r;
        int error;
@@ -203,31 +197,31 @@ int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;
        if (flags & MS_REMOUNT) {
-               type = TOMOYO_MOUNT_REMOUNT_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_REMOUNT];
                flags &= ~MS_REMOUNT;
        }
        if (flags & MS_MOVE) {
-               type = TOMOYO_MOUNT_MOVE_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_MOVE];
                flags &= ~MS_MOVE;
        }
        if (flags & MS_BIND) {
-               type = TOMOYO_MOUNT_BIND_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_BIND];
                flags &= ~MS_BIND;
        }
        if (flags & MS_UNBINDABLE) {
-               type = TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE];
                flags &= ~MS_UNBINDABLE;
        }
        if (flags & MS_PRIVATE) {
-               type = TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE];
                flags &= ~MS_PRIVATE;
        }
        if (flags & MS_SLAVE) {
-               type = TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE];
                flags &= ~MS_SLAVE;
        }
        if (flags & MS_SHARED) {
-               type = TOMOYO_MOUNT_MAKE_SHARED_KEYWORD;
+               type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED];
                flags &= ~MS_SHARED;
        }
        if (!type)
@@ -237,49 +231,3 @@ int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
        tomoyo_read_unlock(idx);
        return error;
 }
-
-static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
-                                 const struct tomoyo_acl_info *b)
-{
-       const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
-       const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);
-       return tomoyo_same_acl_head(&p1->head, &p2->head) &&
-               tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
-               tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
-               tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
-               tomoyo_same_number_union(&p1->flags, &p2->flags);
-}
-
-/**
- * tomoyo_write_mount - Write "struct tomoyo_mount_acl" list.
- *
- * @data:      String to parse.
- * @domain:    Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain,
-                      const bool is_delete)
-{
-       struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
-       int error = is_delete ? -ENOENT : -ENOMEM;
-       char *w[4];
-       if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[3][0])
-               return -EINVAL;
-       if (!tomoyo_parse_name_union(w[0], &e.dev_name) ||
-           !tomoyo_parse_name_union(w[1], &e.dir_name) ||
-           !tomoyo_parse_name_union(w[2], &e.fs_type) ||
-           !tomoyo_parse_number_union(w[3], &e.flags))
-               goto out;
-       error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
-                                    tomoyo_same_mount_acl, NULL);
- out:
-       tomoyo_put_name_union(&e.dev_name);
-       tomoyo_put_name_union(&e.dir_name);
-       tomoyo_put_name_union(&e.fs_type);
-       tomoyo_put_number_union(&e.flags);
-       return error;
-}
index 8d95e91..6c601bd 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/realpath.c
  *
- * Pathname calculation functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/types.h>
@@ -70,6 +68,161 @@ char *tomoyo_encode(const char *str)
 }
 
 /**
+ * tomoyo_get_absolute_path - Get the path of a dentry but ignores chroot'ed root.
+ *
+ * @path:   Pointer to "struct path".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ *
+ * If dentry is a directory, trailing '/' is appended.
+ */
+static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
+                                     const int buflen)
+{
+       char *pos = ERR_PTR(-ENOMEM);
+       if (buflen >= 256) {
+               struct path ns_root = { };
+               /* go to whatever namespace root we are under */
+               pos = __d_path(path, &ns_root, buffer, buflen - 1);
+               if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
+                       struct inode *inode = path->dentry->d_inode;
+                       if (inode && S_ISDIR(inode->i_mode)) {
+                               buffer[buflen - 2] = '/';
+                               buffer[buflen - 1] = '\0';
+                       }
+               }
+       }
+       return pos;
+}
+
+/**
+ * tomoyo_get_dentry_path - Get the path of a dentry.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ *
+ * If dentry is a directory, trailing '/' is appended.
+ */
+static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer,
+                                   const int buflen)
+{
+       char *pos = ERR_PTR(-ENOMEM);
+       if (buflen >= 256) {
+               pos = dentry_path_raw(dentry, buffer, buflen - 1);
+               if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
+                       struct inode *inode = dentry->d_inode;
+                       if (inode && S_ISDIR(inode->i_mode)) {
+                               buffer[buflen - 2] = '/';
+                               buffer[buflen - 1] = '\0';
+                       }
+               }
+       }
+       return pos;
+}
+
+/**
+ * tomoyo_get_local_path - Get the path of a dentry.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ */
+static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
+                                  const int buflen)
+{
+       struct super_block *sb = dentry->d_sb;
+       char *pos = tomoyo_get_dentry_path(dentry, buffer, buflen);
+       if (IS_ERR(pos))
+               return pos;
+       /* Convert from $PID to self if $PID is current thread. */
+       if (sb->s_magic == PROC_SUPER_MAGIC && *pos == '/') {
+               char *ep;
+               const pid_t pid = (pid_t) simple_strtoul(pos + 1, &ep, 10);
+               if (*ep == '/' && pid && pid ==
+                   task_tgid_nr_ns(current, sb->s_fs_info)) {
+                       pos = ep - 5;
+                       if (pos < buffer)
+                               goto out;
+                       memmove(pos, "/self", 5);
+               }
+               goto prepend_filesystem_name;
+       }
+       /* Use filesystem name for unnamed devices. */
+       if (!MAJOR(sb->s_dev))
+               goto prepend_filesystem_name;
+       {
+               struct inode *inode = sb->s_root->d_inode;
+               /*
+                * Use filesystem name if filesystem does not support rename()
+                * operation.
+                */
+               if (inode->i_op && !inode->i_op->rename)
+                       goto prepend_filesystem_name;
+       }
+       /* Prepend device name. */
+       {
+               char name[64];
+               int name_len;
+               const dev_t dev = sb->s_dev;
+               name[sizeof(name) - 1] = '\0';
+               snprintf(name, sizeof(name) - 1, "dev(%u,%u):", MAJOR(dev),
+                        MINOR(dev));
+               name_len = strlen(name);
+               pos -= name_len;
+               if (pos < buffer)
+                       goto out;
+               memmove(pos, name, name_len);
+               return pos;
+       }
+       /* Prepend filesystem name. */
+prepend_filesystem_name:
+       {
+               const char *name = sb->s_type->name;
+               const int name_len = strlen(name);
+               pos -= name_len + 1;
+               if (pos < buffer)
+                       goto out;
+               memmove(pos, name, name_len);
+               pos[name_len] = ':';
+       }
+       return pos;
+out:
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * tomoyo_get_socket_name - Get the name of a socket.
+ *
+ * @path:   Pointer to "struct path".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer.
+ */
+static char *tomoyo_get_socket_name(struct path *path, char * const buffer,
+                                   const int buflen)
+{
+       struct inode *inode = path->dentry->d_inode;
+       struct socket *sock = inode ? SOCKET_I(inode) : NULL;
+       struct sock *sk = sock ? sock->sk : NULL;
+       if (sk) {
+               snprintf(buffer, buflen, "socket:[family=%u:type=%u:"
+                        "protocol=%u]", sk->sk_family, sk->sk_type,
+                        sk->sk_protocol);
+       } else {
+               snprintf(buffer, buflen, "socket:[unknown]");
+       }
+       return buffer;
+}
+
+/**
  * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
  *
  * @path: Pointer to "struct path".
@@ -90,55 +243,42 @@ char *tomoyo_realpath_from_path(struct path *path)
        char *name = NULL;
        unsigned int buf_len = PAGE_SIZE / 2;
        struct dentry *dentry = path->dentry;
-       bool is_dir;
+       struct super_block *sb;
        if (!dentry)
                return NULL;
-       is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode);
+       sb = dentry->d_sb;
        while (1) {
-               struct path ns_root = { .mnt = NULL, .dentry = NULL };
                char *pos;
+               struct inode *inode;
                buf_len <<= 1;
                kfree(buf);
                buf = kmalloc(buf_len, GFP_NOFS);
                if (!buf)
                        break;
+               /* To make sure that pos is '\0' terminated. */
+               buf[buf_len - 1] = '\0';
                /* Get better name for socket. */
-               if (dentry->d_sb->s_magic == SOCKFS_MAGIC) {
-                       struct inode *inode = dentry->d_inode;
-                       struct socket *sock = inode ? SOCKET_I(inode) : NULL;
-                       struct sock *sk = sock ? sock->sk : NULL;
-                       if (sk) {
-                               snprintf(buf, buf_len - 1, "socket:[family=%u:"
-                                        "type=%u:protocol=%u]", sk->sk_family,
-                                        sk->sk_type, sk->sk_protocol);
-                       } else {
-                               snprintf(buf, buf_len - 1, "socket:[unknown]");
-                       }
-                       name = tomoyo_encode(buf);
-                       break;
+               if (sb->s_magic == SOCKFS_MAGIC) {
+                       pos = tomoyo_get_socket_name(path, buf, buf_len - 1);
+                       goto encode;
                }
-               /* For "socket:[\$]" and "pipe:[\$]". */
+               /* For "pipe:[\$]". */
                if (dentry->d_op && dentry->d_op->d_dname) {
                        pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
-                       if (IS_ERR(pos))
-                               continue;
-                       name = tomoyo_encode(pos);
-                       break;
-               }
-               /* If we don't have a vfsmount, we can't calculate. */
-               if (!path->mnt)
-                       break;
-               /* go to whatever namespace root we are under */
-               pos = __d_path(path, &ns_root, buf, buf_len);
-               /* Prepend "/proc" prefix if using internal proc vfs mount. */
-               if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
-                   (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
-                       pos -= 5;
-                       if (pos >= buf)
-                               memcpy(pos, "/proc", 5);
-                       else
-                               pos = ERR_PTR(-ENOMEM);
+                       goto encode;
                }
+               inode = sb->s_root->d_inode;
+               /*
+                * Get local name for filesystems without rename() operation
+                * or dentry without vfsmount.
+                */
+               if (!path->mnt || (inode->i_op && !inode->i_op->rename))
+                       pos = tomoyo_get_local_path(path->dentry, buf,
+                                                   buf_len - 1);
+               /* Get absolute name for the rest. */
+               else
+                       pos = tomoyo_get_absolute_path(path, buf, buf_len - 1);
+encode:
                if (IS_ERR(pos))
                        continue;
                name = tomoyo_encode(pos);
@@ -147,16 +287,6 @@ char *tomoyo_realpath_from_path(struct path *path)
        kfree(buf);
        if (!name)
                tomoyo_warn_oom(__func__);
-       else if (is_dir && *name) {
-               /* Append trailing '/' if dentry is a directory. */
-               char *pos = name + strlen(name) - 1;
-               if (*pos != '/')
-                       /*
-                        * This is OK because tomoyo_encode() reserves space
-                        * for appending "/".
-                        */
-                       *++pos = '/';
-       }
        return name;
 }
 
index e43d555..a49c3bf 100644 (file)
@@ -1,9 +1,7 @@
 /*
- * security/tomoyo/common.c
+ * security/tomoyo/securityfs_if.c
  *
- * Securityfs interface for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/security.h>
@@ -34,11 +32,11 @@ static int tomoyo_open(struct inode *inode, struct file *file)
  */
 static int tomoyo_release(struct inode *inode, struct file *file)
 {
-       return tomoyo_close_control(file);
+       return tomoyo_close_control(file->private_data);
 }
 
 /**
- * tomoyo_poll - poll() for /proc/ccs/ interface.
+ * tomoyo_poll - poll() for /sys/kernel/security/tomoyo/ interface.
  *
  * @file: Pointer to "struct file".
  * @wait: Pointer to "poll_table".
@@ -63,7 +61,7 @@ static unsigned int tomoyo_poll(struct file *file, poll_table *wait)
 static ssize_t tomoyo_read(struct file *file, char __user *buf, size_t count,
                           loff_t *ppos)
 {
-       return tomoyo_read_control(file, buf, count);
+       return tomoyo_read_control(file->private_data, buf, count);
 }
 
 /**
@@ -79,7 +77,7 @@ static ssize_t tomoyo_read(struct file *file, char __user *buf, size_t count,
 static ssize_t tomoyo_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *ppos)
 {
-       return tomoyo_write_control(file, buf, count);
+       return tomoyo_write_control(file->private_data, buf, count);
 }
 
 /*
@@ -135,14 +133,14 @@ static int __init tomoyo_initerface_init(void)
                            TOMOYO_DOMAINPOLICY);
        tomoyo_create_entry("exception_policy", 0600, tomoyo_dir,
                            TOMOYO_EXCEPTIONPOLICY);
+       tomoyo_create_entry("audit",            0400, tomoyo_dir,
+                           TOMOYO_AUDIT);
        tomoyo_create_entry("self_domain",      0400, tomoyo_dir,
                            TOMOYO_SELFDOMAIN);
-       tomoyo_create_entry(".domain_status",   0600, tomoyo_dir,
-                           TOMOYO_DOMAIN_STATUS);
        tomoyo_create_entry(".process_status",  0600, tomoyo_dir,
                            TOMOYO_PROCESS_STATUS);
-       tomoyo_create_entry("meminfo",          0600, tomoyo_dir,
-                           TOMOYO_MEMINFO);
+       tomoyo_create_entry("stat",             0644, tomoyo_dir,
+                           TOMOYO_STAT);
        tomoyo_create_entry("profile",          0600, tomoyo_dir,
                            TOMOYO_PROFILE);
        tomoyo_create_entry("manager",          0600, tomoyo_dir,
index 95d3f95..f776400 100644 (file)
@@ -1,20 +1,35 @@
 /*
  * security/tomoyo/tomoyo.c
  *
- * LSM hooks for TOMOYO Linux.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/security.h>
 #include "common.h"
 
+/**
+ * tomoyo_cred_alloc_blank - Target for security_cred_alloc_blank().
+ *
+ * @new: Pointer to "struct cred".
+ * @gfp: Memory allocation flags.
+ *
+ * Returns 0.
+ */
 static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
 {
        new->security = NULL;
        return 0;
 }
 
+/**
+ * tomoyo_cred_prepare - Target for security_prepare_creds().
+ *
+ * @new: Pointer to "struct cred".
+ * @old: Pointer to "struct cred".
+ * @gfp: Memory allocation flags.
+ *
+ * Returns 0.
+ */
 static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
                               gfp_t gfp)
 {
@@ -25,11 +40,22 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
        return 0;
 }
 
+/**
+ * tomoyo_cred_transfer - Target for security_transfer_creds().
+ *
+ * @new: Pointer to "struct cred".
+ * @old: Pointer to "struct cred".
+ */
 static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
 {
        tomoyo_cred_prepare(new, old, 0);
 }
 
+/**
+ * tomoyo_cred_free - Target for security_cred_free().
+ *
+ * @cred: Pointer to "struct cred".
+ */
 static void tomoyo_cred_free(struct cred *cred)
 {
        struct tomoyo_domain_info *domain = cred->security;
@@ -37,6 +63,13 @@ static void tomoyo_cred_free(struct cred *cred)
                atomic_dec(&domain->users);
 }
 
+/**
+ * tomoyo_bprm_set_creds - Target for security_bprm_set_creds().
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
 {
        int rc;
@@ -51,12 +84,14 @@ static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
         */
        if (bprm->cred_prepared)
                return 0;
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
        /*
         * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested
         * for the first time.
         */
        if (!tomoyo_policy_loaded)
                tomoyo_load_policy(bprm->filename);
+#endif
        /*
         * Release reference to "struct tomoyo_domain_info" stored inside
         * "bprm->cred->security". New reference to "struct tomoyo_domain_info"
@@ -73,6 +108,13 @@ static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
        return 0;
 }
 
+/**
+ * tomoyo_bprm_check_security - Target for security_bprm_check().
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 {
        struct tomoyo_domain_info *domain = bprm->cred->security;
@@ -90,20 +132,59 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
        /*
         * Read permission is checked against interpreters using next domain.
         */
-       return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
+       return tomoyo_check_open_permission(domain, &bprm->file->f_path,
+                                           O_RDONLY);
+}
+
+/**
+ * tomoyo_inode_getattr - Target for security_inode_getattr().
+ *
+ * @mnt:    Pointer to "struct vfsmount".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+       struct path path = { mnt, dentry };
+       return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL);
 }
 
+/**
+ * tomoyo_path_truncate - Target for security_path_truncate().
+ *
+ * @path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_truncate(struct path *path)
 {
-       return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path);
+       return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path, NULL);
 }
 
+/**
+ * tomoyo_path_unlink - Target for security_path_unlink().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_unlink(struct path *parent, struct dentry *dentry)
 {
        struct path path = { parent->mnt, dentry };
-       return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path);
+       return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL);
 }
 
+/**
+ * tomoyo_path_mkdir - Target for security_path_mkdir().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @mode:   DAC permission mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry,
                             int mode)
 {
@@ -112,19 +193,46 @@ static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry,
                                       mode & S_IALLUGO);
 }
 
+/**
+ * tomoyo_path_rmdir - Target for security_path_rmdir().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_rmdir(struct path *parent, struct dentry *dentry)
 {
        struct path path = { parent->mnt, dentry };
-       return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path);
+       return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL);
 }
 
+/**
+ * tomoyo_path_symlink - Target for security_path_symlink().
+ *
+ * @parent:   Pointer to "struct path".
+ * @dentry:   Pointer to "struct dentry".
+ * @old_name: Symlink's content.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry,
                               const char *old_name)
 {
        struct path path = { parent->mnt, dentry };
-       return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path);
+       return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path, old_name);
 }
 
+/**
+ * tomoyo_path_mknod - Target for security_path_mknod().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @mode:   DAC permission mode.
+ * @dev:    Device attributes.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry,
                             int mode, unsigned int dev)
 {
@@ -155,6 +263,15 @@ static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry,
        return tomoyo_path_number_perm(type, &path, perm);
 }
 
+/**
+ * tomoyo_path_link - Target for security_path_link().
+ *
+ * @old_dentry: Pointer to "struct dentry".
+ * @new_dir:    Pointer to "struct path".
+ * @new_dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir,
                            struct dentry *new_dentry)
 {
@@ -163,6 +280,16 @@ static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir,
        return tomoyo_path2_perm(TOMOYO_TYPE_LINK, &path1, &path2);
 }
 
+/**
+ * tomoyo_path_rename - Target for security_path_rename().
+ *
+ * @old_parent: Pointer to "struct path".
+ * @old_dentry: Pointer to "struct dentry".
+ * @new_parent: Pointer to "struct path".
+ * @new_dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_rename(struct path *old_parent,
                              struct dentry *old_dentry,
                              struct path *new_parent,
@@ -173,14 +300,32 @@ static int tomoyo_path_rename(struct path *old_parent,
        return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2);
 }
 
+/**
+ * tomoyo_file_fcntl - Target for security_file_fcntl().
+ *
+ * @file: Pointer to "struct file".
+ * @cmd:  Command for fcntl().
+ * @arg:  Argument for @cmd.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
                             unsigned long arg)
 {
-       if (cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND))
-               return tomoyo_path_perm(TOMOYO_TYPE_REWRITE, &file->f_path);
-       return 0;
+       if (!(cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND)))
+               return 0;
+       return tomoyo_check_open_permission(tomoyo_domain(), &file->f_path,
+                                           O_WRONLY | (arg & O_APPEND));
 }
 
+/**
+ * tomoyo_dentry_open - Target for security_dentry_open().
+ *
+ * @f:    Pointer to "struct file".
+ * @cred: Pointer to "struct cred".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
        int flags = f->f_flags;
@@ -190,12 +335,30 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
        return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags);
 }
 
+/**
+ * tomoyo_file_ioctl - Target for security_file_ioctl().
+ *
+ * @file: Pointer to "struct file".
+ * @cmd:  Command for ioctl().
+ * @arg:  Argument for @cmd.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_file_ioctl(struct file *file, unsigned int cmd,
                             unsigned long arg)
 {
        return tomoyo_path_number_perm(TOMOYO_TYPE_IOCTL, &file->f_path, cmd);
 }
 
+/**
+ * tomoyo_path_chmod - Target for security_path_chmod().
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @mnt:    Pointer to "struct vfsmount".
+ * @mode:   DAC permission mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
                             mode_t mode)
 {
@@ -204,6 +367,15 @@ static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
                                       mode & S_IALLUGO);
 }
 
+/**
+ * tomoyo_path_chown - Target for security_path_chown().
+ *
+ * @path: Pointer to "struct path".
+ * @uid:  Owner ID.
+ * @gid:  Group ID.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid)
 {
        int error = 0;
@@ -214,23 +386,57 @@ static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid)
        return error;
 }
 
+/**
+ * tomoyo_path_chroot - Target for security_path_chroot().
+ *
+ * @path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_path_chroot(struct path *path)
 {
-       return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path);
+       return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path, NULL);
 }
 
+/**
+ * tomoyo_sb_mount - Target for security_sb_mount().
+ *
+ * @dev_name: Name of device file. Maybe NULL.
+ * @path:     Pointer to "struct path".
+ * @type:     Name of filesystem type. Maybe NULL.
+ * @flags:    Mount options.
+ * @data:     Optional data. Maybe NULL.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_sb_mount(char *dev_name, struct path *path,
                           char *type, unsigned long flags, void *data)
 {
        return tomoyo_mount_permission(dev_name, path, type, flags, data);
 }
 
+/**
+ * tomoyo_sb_umount - Target for security_sb_umount().
+ *
+ * @mnt:   Pointer to "struct vfsmount".
+ * @flags: Unmount options.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_sb_umount(struct vfsmount *mnt, int flags)
 {
        struct path path = { mnt, mnt->mnt_root };
-       return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path);
+       return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path, NULL);
 }
 
+/**
+ * tomoyo_sb_pivotroot - Target for security_sb_pivotroot().
+ *
+ * @old_path: Pointer to "struct path".
+ * @new_path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
 static int tomoyo_sb_pivotroot(struct path *old_path, struct path *new_path)
 {
        return tomoyo_path2_perm(TOMOYO_TYPE_PIVOT_ROOT, new_path, old_path);
@@ -258,6 +464,7 @@ static struct security_operations tomoyo_security_ops = {
        .path_mknod          = tomoyo_path_mknod,
        .path_link           = tomoyo_path_link,
        .path_rename         = tomoyo_path_rename,
+       .inode_getattr       = tomoyo_inode_getattr,
        .file_ioctl          = tomoyo_file_ioctl,
        .path_chmod          = tomoyo_path_chmod,
        .path_chown          = tomoyo_path_chown,
@@ -270,6 +477,11 @@ static struct security_operations tomoyo_security_ops = {
 /* Lock for GC. */
 struct srcu_struct tomoyo_ss;
 
+/**
+ * tomoyo_init - Register TOMOYO Linux as a LSM module.
+ *
+ * Returns 0.
+ */
 static int __init tomoyo_init(void)
 {
        struct cred *cred = (struct cred *) current_cred();
index 6d53932..c36bd11 100644 (file)
@@ -1,9 +1,7 @@
 /*
  * security/tomoyo/util.c
  *
- * Utility functions for TOMOYO.
- *
- * Copyright (C) 2005-2010  NTT DATA CORPORATION
+ * Copyright (C) 2005-2011  NTT DATA CORPORATION
  */
 
 #include <linux/slab.h>
@@ -15,18 +13,130 @@ DEFINE_MUTEX(tomoyo_policy_lock);
 /* Has /sbin/init started? */
 bool tomoyo_policy_loaded;
 
+/*
+ * Mapping table from "enum tomoyo_mac_index" to
+ * "enum tomoyo_mac_category_index".
+ */
+const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = {
+       /* CONFIG::file group */
+       [TOMOYO_MAC_FILE_EXECUTE]    = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_OPEN]       = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_CREATE]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_UNLINK]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_GETATTR]    = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_MKDIR]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_RMDIR]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_MKFIFO]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_MKSOCK]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_TRUNCATE]   = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_SYMLINK]    = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_MKBLOCK]    = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_MKCHAR]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_LINK]       = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_RENAME]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_CHMOD]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_CHOWN]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_CHGRP]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_IOCTL]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_CHROOT]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_MOUNT]      = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_UMOUNT]     = TOMOYO_MAC_CATEGORY_FILE,
+       [TOMOYO_MAC_FILE_PIVOT_ROOT] = TOMOYO_MAC_CATEGORY_FILE,
+};
+
+/**
+ * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss.
+ *
+ * @time:  Seconds since 1970/01/01 00:00:00.
+ * @stamp: Pointer to "struct tomoyo_time".
+ *
+ * Returns nothing.
+ *
+ * This function does not handle Y2038 problem.
+ */
+void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp)
+{
+       static const u16 tomoyo_eom[2][12] = {
+               { 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+               { 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+       };
+       u16 y;
+       u8 m;
+       bool r;
+       stamp->sec = time % 60;
+       time /= 60;
+       stamp->min = time % 60;
+       time /= 60;
+       stamp->hour = time % 24;
+       time /= 24;
+       for (y = 1970; ; y++) {
+               const unsigned short days = (y & 3) ? 365 : 366;
+               if (time < days)
+                       break;
+               time -= days;
+       }
+       r = (y & 3) == 0;
+       for (m = 0; m < 11 && time >= tomoyo_eom[r][m]; m++)
+               ;
+       if (m)
+               time -= tomoyo_eom[r][m - 1];
+       stamp->year = y;
+       stamp->month = ++m;
+       stamp->day = ++time;
+}
+
+/**
+ * tomoyo_permstr - Find permission keywords.
+ *
+ * @string: String representation for permissions in foo/bar/buz format.
+ * @keyword: Keyword to find from @string/
+ *
+ * Returns ture if @keyword was found in @string, false otherwise.
+ *
+ * This function assumes that strncmp(w1, w2, strlen(w1)) != 0 if w1 != w2.
+ */
+bool tomoyo_permstr(const char *string, const char *keyword)
+{
+       const char *cp = strstr(string, keyword);
+       if (cp)
+               return cp == string || *(cp - 1) == '/';
+       return false;
+}
+
+/**
+ * tomoyo_read_token - Read a word from a line.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns a word on success, "" otherwise.
+ *
+ * To allow the caller to skip NULL check, this function returns "" rather than
+ * NULL if there is no more words to read.
+ */
+char *tomoyo_read_token(struct tomoyo_acl_param *param)
+{
+       char *pos = param->data;
+       char *del = strchr(pos, ' ');
+       if (del)
+               *del++ = '\0';
+       else
+               del = pos + strlen(pos);
+       param->data = del;
+       return pos;
+}
+
 /**
  * tomoyo_parse_ulong - Parse an "unsigned long" value.
  *
  * @result: Pointer to "unsigned long".
  * @str:    Pointer to string to parse.
  *
- * Returns value type on success, 0 otherwise.
+ * Returns one of values in "enum tomoyo_value_type".
  *
  * The @src is updated to point the first character after the value
  * on success.
  */
-static u8 tomoyo_parse_ulong(unsigned long *result, char **str)
+u8 tomoyo_parse_ulong(unsigned long *result, char **str)
 {
        const char *cp = *str;
        char *ep;
@@ -43,7 +153,7 @@ static u8 tomoyo_parse_ulong(unsigned long *result, char **str)
        }
        *result = simple_strtoul(cp, &ep, base);
        if (cp == ep)
-               return 0;
+               return TOMOYO_VALUE_TYPE_INVALID;
        *str = ep;
        switch (base) {
        case 16:
@@ -81,63 +191,65 @@ void tomoyo_print_ulong(char *buffer, const int buffer_len,
 /**
  * tomoyo_parse_name_union - Parse a tomoyo_name_union.
  *
- * @filename: Name or name group.
- * @ptr:      Pointer to "struct tomoyo_name_union".
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr:   Pointer to "struct tomoyo_name_union".
  *
  * Returns true on success, false otherwise.
  */
-bool tomoyo_parse_name_union(const char *filename,
+bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
                             struct tomoyo_name_union *ptr)
 {
-       if (!tomoyo_correct_word(filename))
-               return false;
-       if (filename[0] == '@') {
-               ptr->group = tomoyo_get_group(filename + 1, TOMOYO_PATH_GROUP);
-               ptr->is_group = true;
+       char *filename;
+       if (param->data[0] == '@') {
+               param->data++;
+               ptr->group = tomoyo_get_group(param, TOMOYO_PATH_GROUP);
                return ptr->group != NULL;
        }
+       filename = tomoyo_read_token(param);
+       if (!tomoyo_correct_word(filename))
+               return false;
        ptr->filename = tomoyo_get_name(filename);
-       ptr->is_group = false;
        return ptr->filename != NULL;
 }
 
 /**
  * tomoyo_parse_number_union - Parse a tomoyo_number_union.
  *
- * @data: Number or number range or number group.
- * @ptr:  Pointer to "struct tomoyo_number_union".
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr:   Pointer to "struct tomoyo_number_union".
  *
  * Returns true on success, false otherwise.
  */
-bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num)
+bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
+                              struct tomoyo_number_union *ptr)
 {
+       char *data;
        u8 type;
        unsigned long v;
-       memset(num, 0, sizeof(*num));
-       if (data[0] == '@') {
-               if (!tomoyo_correct_word(data))
-                       return false;
-               num->group = tomoyo_get_group(data + 1, TOMOYO_NUMBER_GROUP);
-               num->is_group = true;
-               return num->group != NULL;
+       memset(ptr, 0, sizeof(*ptr));
+       if (param->data[0] == '@') {
+               param->data++;
+               ptr->group = tomoyo_get_group(param, TOMOYO_NUMBER_GROUP);
+               return ptr->group != NULL;
        }
+       data = tomoyo_read_token(param);
        type = tomoyo_parse_ulong(&v, &data);
-       if (!type)
+       if (type == TOMOYO_VALUE_TYPE_INVALID)
                return false;
-       num->values[0] = v;
-       num->min_type = type;
+       ptr->values[0] = v;
+       ptr->value_type[0] = type;
        if (!*data) {
-               num->values[1] = v;
-               num->max_type = type;
+               ptr->values[1] = v;
+               ptr->value_type[1] = type;
                return true;
        }
        if (*data++ != '-')
                return false;
        type = tomoyo_parse_ulong(&v, &data);
-       if (!type || *data)
+       if (type == TOMOYO_VALUE_TYPE_INVALID || *data || ptr->values[0] > v)
                return false;
-       num->values[1] = v;
-       num->max_type = type;
+       ptr->values[1] = v;
+       ptr->value_type[1] = type;
        return true;
 }
 
@@ -185,6 +297,30 @@ static inline u8 tomoyo_make_byte(const u8 c1, const u8 c2, const u8 c3)
 }
 
 /**
+ * tomoyo_valid - Check whether the character is a valid char.
+ *
+ * @c: The character to check.
+ *
+ * Returns true if @c is a valid character, false otherwise.
+ */
+static inline bool tomoyo_valid(const unsigned char c)
+{
+       return c > ' ' && c < 127;
+}
+
+/**
+ * tomoyo_invalid - Check whether the character is an invalid char.
+ *
+ * @c: The character to check.
+ *
+ * Returns true if @c is an invalid character, false otherwise.
+ */
+static inline bool tomoyo_invalid(const unsigned char c)
+{
+       return c && (c <= ' ' || c >= 127);
+}
+
+/**
  * tomoyo_str_starts - Check whether the given string starts with the given keyword.
  *
  * @src:  Pointer to pointer to the string.
@@ -238,36 +374,9 @@ void tomoyo_normalize_line(unsigned char *buffer)
 }
 
 /**
- * tomoyo_tokenize - Tokenize string.
- *
- * @buffer: The line to tokenize.
- * @w:      Pointer to "char *".
- * @size:   Sizeof @w .
- *
- * Returns true on success, false otherwise.
- */
-bool tomoyo_tokenize(char *buffer, char *w[], size_t size)
-{
-       int count = size / sizeof(char *);
-       int i;
-       for (i = 0; i < count; i++)
-               w[i] = "";
-       for (i = 0; i < count; i++) {
-               char *cp = strchr(buffer, ' ');
-               if (cp)
-                       *cp = '\0';
-               w[i] = buffer;
-               if (!cp)
-                       break;
-               buffer = cp + 1;
-       }
-       return i < count || !*buffer;
-}
-
-/**
  * tomoyo_correct_word2 - Validate a string.
  *
- * @string: The string to check. May be non-'\0'-terminated.
+ * @string: The string to check. Maybe non-'\0'-terminated.
  * @len:    Length of @string.
  *
  * Check whether the given string follows the naming rules.
@@ -377,26 +486,21 @@ bool tomoyo_correct_path(const char *filename)
  */
 bool tomoyo_correct_domain(const unsigned char *domainname)
 {
-       if (!domainname || strncmp(domainname, TOMOYO_ROOT_NAME,
-                                  TOMOYO_ROOT_NAME_LEN))
-               goto out;
-       domainname += TOMOYO_ROOT_NAME_LEN;
-       if (!*domainname)
+       if (!domainname || !tomoyo_domain_def(domainname))
+               return false;
+       domainname = strchr(domainname, ' ');
+       if (!domainname++)
                return true;
-       if (*domainname++ != ' ')
-               goto out;
        while (1) {
                const unsigned char *cp = strchr(domainname, ' ');
                if (!cp)
                        break;
                if (*domainname != '/' ||
                    !tomoyo_correct_word2(domainname, cp - domainname))
-                       goto out;
+                       return false;
                domainname = cp + 1;
        }
        return tomoyo_correct_path(domainname);
- out:
-       return false;
 }
 
 /**
@@ -408,7 +512,19 @@ bool tomoyo_correct_domain(const unsigned char *domainname)
  */
 bool tomoyo_domain_def(const unsigned char *buffer)
 {
-       return !strncmp(buffer, TOMOYO_ROOT_NAME, TOMOYO_ROOT_NAME_LEN);
+       const unsigned char *cp;
+       int len;
+       if (*buffer != '<')
+               return false;
+       cp = strchr(buffer, ' ');
+       if (!cp)
+               len = strlen(buffer);
+       else
+               len = cp - buffer;
+       if (buffer[len - 1] != '>' ||
+           !tomoyo_correct_word2(buffer + 1, len - 2))
+               return false;
+       return true;
 }
 
 /**
@@ -794,22 +910,24 @@ const char *tomoyo_get_exe(void)
 /**
  * tomoyo_get_mode - Get MAC mode.
  *
+ * @ns:      Pointer to "struct tomoyo_policy_namespace".
  * @profile: Profile number.
  * @index:   Index number of functionality.
  *
  * Returns mode.
  */
-int tomoyo_get_mode(const u8 profile, const u8 index)
+int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
+                   const u8 index)
 {
        u8 mode;
        const u8 category = TOMOYO_MAC_CATEGORY_FILE;
        if (!tomoyo_policy_loaded)
                return TOMOYO_CONFIG_DISABLED;
-       mode = tomoyo_profile(profile)->config[index];
+       mode = tomoyo_profile(ns, profile)->config[index];
        if (mode == TOMOYO_CONFIG_USE_DEFAULT)
-               mode = tomoyo_profile(profile)->config[category];
+               mode = tomoyo_profile(ns, profile)->config[category];
        if (mode == TOMOYO_CONFIG_USE_DEFAULT)
-               mode = tomoyo_profile(profile)->default_config;
+               mode = tomoyo_profile(ns, profile)->default_config;
        return mode & 3;
 }
 
@@ -833,65 +951,11 @@ int tomoyo_init_request_info(struct tomoyo_request_info *r,
        profile = domain->profile;
        r->profile = profile;
        r->type = index;
-       r->mode = tomoyo_get_mode(profile, index);
+       r->mode = tomoyo_get_mode(domain->ns, profile, index);
        return r->mode;
 }
 
 /**
- * tomoyo_last_word - Get last component of a line.
- *
- * @line: A line.
- *
- * Returns the last word of a line.
- */
-const char *tomoyo_last_word(const char *name)
-{
-       const char *cp = strrchr(name, ' ');
-       if (cp)
-               return cp + 1;
-       return name;
-}
-
-/**
- * tomoyo_warn_log - Print warning or error message on console.
- *
- * @r:   Pointer to "struct tomoyo_request_info".
- * @fmt: The printf()'s format string, followed by parameters.
- */
-void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...)
-{
-       va_list args;
-       char *buffer;
-       const struct tomoyo_domain_info * const domain = r->domain;
-       const struct tomoyo_profile *profile = tomoyo_profile(domain->profile);
-       switch (r->mode) {
-       case TOMOYO_CONFIG_ENFORCING:
-               if (!profile->enforcing->enforcing_verbose)
-                       return;
-               break;
-       case TOMOYO_CONFIG_PERMISSIVE:
-               if (!profile->permissive->permissive_verbose)
-                       return;
-               break;
-       case TOMOYO_CONFIG_LEARNING:
-               if (!profile->learning->learning_verbose)
-                       return;
-               break;
-       }
-       buffer = kmalloc(4096, GFP_NOFS);
-       if (!buffer)
-               return;
-       va_start(args, fmt);
-       vsnprintf(buffer, 4095, fmt, args);
-       va_end(args);
-       buffer[4095] = '\0';
-       printk(KERN_WARNING "%s: Access %s denied for %s\n",
-              r->mode == TOMOYO_CONFIG_ENFORCING ? "ERROR" : "WARNING", buffer,
-              tomoyo_last_word(domain->domainname->name));
-       kfree(buffer);
-}
-
-/**
  * tomoyo_domain_quota_is_ok - Check for domain's quota.
  *
  * @r: Pointer to "struct tomoyo_request_info".
@@ -911,52 +975,43 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
        if (!domain)
                return true;
        list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+               u16 perm;
+               u8 i;
                if (ptr->is_deleted)
                        continue;
                switch (ptr->type) {
-                       u16 perm;
-                       u8 i;
                case TOMOYO_TYPE_PATH_ACL:
                        perm = container_of(ptr, struct tomoyo_path_acl, head)
                                ->perm;
-                       for (i = 0; i < TOMOYO_MAX_PATH_OPERATION; i++)
-                               if (perm & (1 << i))
-                                       count++;
-                       if (perm & (1 << TOMOYO_TYPE_READ_WRITE))
-                               count -= 2;
                        break;
                case TOMOYO_TYPE_PATH2_ACL:
                        perm = container_of(ptr, struct tomoyo_path2_acl, head)
                                ->perm;
-                       for (i = 0; i < TOMOYO_MAX_PATH2_OPERATION; i++)
-                               if (perm & (1 << i))
-                                       count++;
                        break;
                case TOMOYO_TYPE_PATH_NUMBER_ACL:
                        perm = container_of(ptr, struct tomoyo_path_number_acl,
                                            head)->perm;
-                       for (i = 0; i < TOMOYO_MAX_PATH_NUMBER_OPERATION; i++)
-                               if (perm & (1 << i))
-                                       count++;
                        break;
                case TOMOYO_TYPE_MKDEV_ACL:
                        perm = container_of(ptr, struct tomoyo_mkdev_acl,
                                            head)->perm;
-                       for (i = 0; i < TOMOYO_MAX_MKDEV_OPERATION; i++)
-                               if (perm & (1 << i))
-                                       count++;
                        break;
                default:
-                       count++;
+                       perm = 1;
                }
+               for (i = 0; i < 16; i++)
+                       if (perm & (1 << i))
+                               count++;
        }
-       if (count < tomoyo_profile(domain->profile)->learning->
-           learning_max_entry)
+       if (count < tomoyo_profile(domain->ns, domain->profile)->
+           pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
                return true;
-       if (!domain->quota_warned) {
-               domain->quota_warned = true;
-               printk(KERN_WARNING "TOMOYO-WARNING: "
-                      "Domain '%s' has so many ACLs to hold. "
+       if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
+               domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
+               /* r->granted = false; */
+               tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+               printk(KERN_WARNING "WARNING: "
+                      "Domain '%s' has too many ACLs to hold. "
                       "Stopped learning mode.\n", domain->domainname->name);
        }
        return false;
index f134130..86d0caf 100644 (file)
@@ -128,7 +128,8 @@ void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_ufram
        }
 }
 
-static void pcm_debug_name(struct snd_pcm_substream *substream,
+#ifdef CONFIG_SND_DEBUG
+void snd_pcm_debug_name(struct snd_pcm_substream *substream,
                           char *name, size_t len)
 {
        snprintf(name, len, "pcmC%dD%d%c:%d",
@@ -137,6 +138,8 @@ static void pcm_debug_name(struct snd_pcm_substream *substream,
                 substream->stream ? 'c' : 'p',
                 substream->number);
 }
+EXPORT_SYMBOL(snd_pcm_debug_name);
+#endif
 
 #define XRUN_DEBUG_BASIC       (1<<0)
 #define XRUN_DEBUG_STACK       (1<<1)  /* dump also stack */
@@ -168,7 +171,7 @@ static void xrun(struct snd_pcm_substream *substream)
        snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
        if (xrun_debug(substream, XRUN_DEBUG_BASIC)) {
                char name[16];
-               pcm_debug_name(substream, name, sizeof(name));
+               snd_pcm_debug_name(substream, name, sizeof(name));
                snd_printd(KERN_DEBUG "XRUN: %s\n", name);
                dump_stack_on_xrun(substream);
        }
@@ -243,7 +246,7 @@ static void xrun_log_show(struct snd_pcm_substream *substream)
                return;
        if (xrun_debug(substream, XRUN_DEBUG_LOGONCE) && log->hit)
                return;
-       pcm_debug_name(substream, name, sizeof(name));
+       snd_pcm_debug_name(substream, name, sizeof(name));
        for (cnt = 0, idx = log->idx; cnt < XRUN_LOG_CNT; cnt++) {
                entry = &log->entries[idx];
                if (entry->period_size == 0)
@@ -319,7 +322,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
        if (pos >= runtime->buffer_size) {
                if (printk_ratelimit()) {
                        char name[16];
-                       pcm_debug_name(substream, name, sizeof(name));
+                       snd_pcm_debug_name(substream, name, sizeof(name));
                        xrun_log_show(substream);
                        snd_printd(KERN_ERR  "BUG: %s, pos = %ld, "
                                   "buffer size = %ld, period size = %ld\n",
@@ -364,7 +367,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
        if (xrun_debug(substream, in_interrupt ?
                        XRUN_DEBUG_PERIODUPDATE : XRUN_DEBUG_HWPTRUPDATE)) {
                char name[16];
-               pcm_debug_name(substream, name, sizeof(name));
+               snd_pcm_debug_name(substream, name, sizeof(name));
                snd_printd("%s_update: %s: pos=%u/%u/%u, "
                           "hwptr=%ld/%ld/%ld/%ld\n",
                           in_interrupt ? "period" : "hwptr",
index 3773e24..a168ba3 100644 (file)
@@ -249,7 +249,7 @@ struct snd_msnd {
 
        /* State variables */
        enum { msndClassic, msndPinnacle } type;
-       mode_t mode;
+       fmode_t mode;
        unsigned long flags;
 #define F_RESETTING                    0
 #define F_HAVEDIGITAL                  1
index 4d2a6ae..8a197fd 100644 (file)
@@ -458,7 +458,7 @@ static int ad1848_set_recmask(ad1848_info * devc, int mask)
        return mask;
 }
 
-static void change_bits(ad1848_info * devc, unsigned char *regval,
+static void oss_change_bits(ad1848_info *devc, unsigned char *regval,
                        unsigned char *muteval, int dev, int chn, int newval)
 {
        unsigned char mask;
@@ -516,10 +516,10 @@ static void ad1848_mixer_set_channel(ad1848_info *devc, int dev, int value, int
 
        if (muteregoffs != regoffs) {
                muteval = ad_read(devc, muteregoffs);
-               change_bits(devc, &val, &muteval, dev, channel, value);
+               oss_change_bits(devc, &val, &muteval, dev, channel, value);
        }
        else
-               change_bits(devc, &val, &val, dev, channel, value);
+               oss_change_bits(devc, &val, &val, dev, channel, value);
 
        spin_lock_irqsave(&devc->lock,flags);
        ad_write(devc, regoffs, val);
index 2039d31..f8f3b7a 100644 (file)
@@ -232,7 +232,7 @@ static int detect_mixer(sb_devc * devc)
        return 1;
 }
 
-static void change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval)
+static void oss_change_bits(sb_devc *devc, unsigned char *regval, int dev, int chn, int newval)
 {
        unsigned char mask;
        int shift;
@@ -284,7 +284,7 @@ int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right)
                return -EINVAL;
 
        val = sb_getmixer(devc, regoffs);
-       change_bits(devc, &val, dev, LEFT_CHN, left);
+       oss_change_bits(devc, &val, dev, LEFT_CHN, left);
 
        if ((*devc->iomap)[dev][RIGHT_CHN].regno != regoffs)    /*
                                                                 * Change register
@@ -304,7 +304,7 @@ int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right)
                                                         * Read the new one
                                                         */
        }
-       change_bits(devc, &val, dev, RIGHT_CHN, right);
+       oss_change_bits(devc, &val, dev, RIGHT_CHN, right);
 
        sb_setmixer(devc, regoffs, val);
 
index b941d25..eae62eb 100644 (file)
 #include <sound/tlv.h>
 #include <sound/hwdep.h>
 
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>");
 MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx");
 
-#if defined CONFIG_SND_DEBUG
-/* copied from pcm_lib.c, hope later patch will make that version public
-and this copy can be removed */
-static inline void
-snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
-{
-       snprintf(buf, size, "pcmC%dD%d%c:%d",
-                substream->pcm->card->number,
-                substream->pcm->device,
-                substream->stream ? 'c' : 'p',
-                substream->number);
-}
-#else
-static inline void
-snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
-{
-       *buf = 0;
-}
-#endif
-
 #if defined CONFIG_SND_DEBUG_VERBOSE
 /**
  * snd_printddd - very verbose debug printk
index 65fcf47..9683f84 100644 (file)
@@ -107,7 +107,6 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        union hpi_response_buffer_v1 *hr;
        u16 res_max_size;
        u32 uncopied_bytes;
-       struct hpi_adapter *pa = NULL;
        int err = 0;
 
        if (cmd != HPI_IOCTL_LINUX)
@@ -182,8 +181,9 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                /* -1=no data 0=read from user mem, 1=write to user mem */
                int wrflag = -1;
                u32 adapter = hm->h.adapter_index;
+               struct hpi_adapter *pa = &adapters[adapter];
 
-               if ((adapter > HPI_MAX_ADAPTERS) || (!pa->type)) {
+               if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) {
                        hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER,
                                HPI_ADAPTER_OPEN,
                                HPI_ERROR_BAD_ADAPTER_NUMBER);
@@ -197,9 +197,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        goto out;
                }
 
-               pa = &adapters[adapter];
-
-               if (mutex_lock_interruptible(&adapters[adapter].mutex)) {
+               if (mutex_lock_interruptible(&pa->mutex)) {
                        err = -EINTR;
                        goto out;
                }
@@ -235,8 +233,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                                                        "stream buffer size %d\n",
                                                        size);
 
-                                               mutex_unlock(&adapters
-                                                       [adapter].mutex);
+                                               mutex_unlock(&pa->mutex);
                                                err = -EINVAL;
                                                goto out;
                                        }
@@ -277,7 +274,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                                        uncopied_bytes, size);
                }
 
-               mutex_unlock(&adapters[adapter].mutex);
+               mutex_unlock(&pa->mutex);
        }
 
        /* on return response size must be set */
index 7489b46..bb7e102 100644 (file)
@@ -243,6 +243,7 @@ config SND_HDA_GENERIC
 
 config SND_HDA_POWER_SAVE
        bool "Aggressive power-saving on HD-audio"
+       depends on PM
        help
          Say Y here to enable more aggressive power-saving mode on
          HD-audio driver.  The power-saving timeout can be configured
index 9c27a3a..3e7850c 100644 (file)
@@ -91,8 +91,10 @@ EXPORT_SYMBOL_HDA(snd_hda_delete_codec_preset);
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 static void hda_power_work(struct work_struct *work);
 static void hda_keep_power_on(struct hda_codec *codec);
+#define hda_codec_is_power_on(codec)   ((codec)->power_on)
 #else
 static inline void hda_keep_power_on(struct hda_codec *codec) {}
+#define hda_codec_is_power_on(codec)   1
 #endif
 
 /**
@@ -1101,7 +1103,7 @@ void snd_hda_shutup_pins(struct hda_codec *codec)
 }
 EXPORT_SYMBOL_HDA(snd_hda_shutup_pins);
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /* Restore the pin controls cleared previously via snd_hda_shutup_pins() */
 static void restore_shutup_pins(struct hda_codec *codec)
 {
@@ -1499,7 +1501,7 @@ static void purify_inactive_streams(struct hda_codec *codec)
        }
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /* clean up all streams; called from suspend */
 static void hda_cleanup_all_streams(struct hda_codec *codec)
 {
@@ -1838,7 +1840,7 @@ int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid,
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_amp_stereo);
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /**
  * snd_hda_codec_resume_amp - Resume all AMP commands from the cache
  * @codec: HD-audio codec
@@ -1868,7 +1870,7 @@ void snd_hda_codec_resume_amp(struct hda_codec *codec)
        }
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_resume_amp);
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
 static u32 get_amp_max_value(struct hda_codec *codec, hda_nid_t nid, int dir,
                             unsigned int ofs)
@@ -3082,7 +3084,7 @@ int snd_hda_create_spdif_in_ctls(struct hda_codec *codec, hda_nid_t nid)
 }
 EXPORT_SYMBOL_HDA(snd_hda_create_spdif_in_ctls);
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /*
  * command cache
  */
@@ -3199,53 +3201,32 @@ void snd_hda_sequence_write_cache(struct hda_codec *codec,
                                          seq->param);
 }
 EXPORT_SYMBOL_HDA(snd_hda_sequence_write_cache);
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
-/*
- * set power state of the codec
- */
-static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
-                               unsigned int power_state)
+void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg,
+                                   unsigned int power_state,
+                                   bool eapd_workaround)
 {
-       hda_nid_t nid;
+       hda_nid_t nid = codec->start_nid;
        int i;
 
-       /* this delay seems necessary to avoid click noise at power-down */
-       if (power_state == AC_PWRST_D3)
-               msleep(100);
-       snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
-                           power_state);
-       /* partial workaround for "azx_get_response timeout" */
-       if (power_state == AC_PWRST_D0 &&
-           (codec->vendor_id & 0xffff0000) == 0x14f10000)
-               msleep(10);
-
-       nid = codec->start_nid;
        for (i = 0; i < codec->num_nodes; i++, nid++) {
                unsigned int wcaps = get_wcaps(codec, nid);
-               if (wcaps & AC_WCAP_POWER) {
-                       unsigned int wid_type = get_wcaps_type(wcaps);
-                       if (power_state == AC_PWRST_D3 &&
-                           wid_type == AC_WID_PIN) {
-                               unsigned int pincap;
-                               /*
-                                * don't power down the widget if it controls
-                                * eapd and EAPD_BTLENABLE is set.
-                                */
-                               pincap = snd_hda_query_pin_caps(codec, nid);
-                               if (pincap & AC_PINCAP_EAPD) {
-                                       int eapd = snd_hda_codec_read(codec,
-                                               nid, 0,
+               if (!(wcaps & AC_WCAP_POWER))
+                       continue;
+               /* don't power down the widget if it controls eapd and
+                * EAPD_BTLENABLE is set.
+                */
+               if (eapd_workaround && power_state == AC_PWRST_D3 &&
+                   get_wcaps_type(wcaps) == AC_WID_PIN &&
+                   (snd_hda_query_pin_caps(codec, nid) & AC_PINCAP_EAPD)) {
+                       int eapd = snd_hda_codec_read(codec, nid, 0,
                                                AC_VERB_GET_EAPD_BTLENABLE, 0);
-                                       eapd &= 0x02;
-                                       if (eapd)
-                                               continue;
-                               }
-                       }
-                       snd_hda_codec_write(codec, nid, 0,
-                                           AC_VERB_SET_POWER_STATE,
-                                           power_state);
+                       if (eapd & 0x02)
+                               continue;
                }
+               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE,
+                                   power_state);
        }
 
        if (power_state == AC_PWRST_D0) {
@@ -3262,6 +3243,26 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
                } while (time_after_eq(end_time, jiffies));
        }
 }
+EXPORT_SYMBOL_HDA(snd_hda_codec_set_power_to_all);
+
+/*
+ * set power state of the codec
+ */
+static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
+                               unsigned int power_state)
+{
+       if (codec->patch_ops.set_power_state) {
+               codec->patch_ops.set_power_state(codec, fg, power_state);
+               return;
+       }
+
+       /* this delay seems necessary to avoid click noise at power-down */
+       if (power_state == AC_PWRST_D3)
+               msleep(100);
+       snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+                           power_state);
+       snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
 
 #ifdef CONFIG_SND_HDA_HWDEP
 /* execute additional init verbs */
@@ -3274,7 +3275,7 @@ static void hda_exec_init_verbs(struct hda_codec *codec)
 static inline void hda_exec_init_verbs(struct hda_codec *codec) {}
 #endif
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 /*
  * call suspend and power-down; used both from PM and power-save
  */
@@ -3315,7 +3316,7 @@ static void hda_call_codec_resume(struct hda_codec *codec)
                snd_hda_codec_resume_cache(codec);
        }
 }
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
 
 /**
@@ -4071,9 +4072,6 @@ int snd_hda_add_new_ctls(struct hda_codec *codec,
 EXPORT_SYMBOL_HDA(snd_hda_add_new_ctls);
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
-static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
-                               unsigned int power_state);
-
 static void hda_power_work(struct work_struct *work)
 {
        struct hda_codec *codec =
@@ -4376,11 +4374,8 @@ void snd_hda_bus_reboot_notify(struct hda_bus *bus)
        if (!bus)
                return;
        list_for_each_entry(codec, &bus->codec_list, list) {
-#ifdef CONFIG_SND_HDA_POWER_SAVE
-               if (!codec->power_on)
-                       continue;
-#endif
-               if (codec->patch_ops.reboot_notify)
+               if (hda_codec_is_power_on(codec) &&
+                   codec->patch_ops.reboot_notify)
                        codec->patch_ops.reboot_notify(codec);
        }
 }
@@ -5079,11 +5074,10 @@ int snd_hda_suspend(struct hda_bus *bus)
        struct hda_codec *codec;
 
        list_for_each_entry(codec, &bus->codec_list, list) {
-#ifdef CONFIG_SND_HDA_POWER_SAVE
-               if (!codec->power_on)
-                       continue;
-#endif
-               hda_call_codec_suspend(codec);
+               if (hda_codec_is_power_on(codec))
+                       hda_call_codec_suspend(codec);
+               if (codec->patch_ops.post_suspend)
+                       codec->patch_ops.post_suspend(codec);
        }
        return 0;
 }
@@ -5103,6 +5097,8 @@ int snd_hda_resume(struct hda_bus *bus)
        struct hda_codec *codec;
 
        list_for_each_entry(codec, &bus->codec_list, list) {
+               if (codec->patch_ops.pre_resume)
+                       codec->patch_ops.pre_resume(codec);
                if (snd_hda_codec_needs_resume(codec))
                        hda_call_codec_resume(codec);
        }
index f465e07..755f2b0 100644 (file)
 #include <sound/pcm.h>
 #include <sound/hwdep.h>
 
-#if defined(CONFIG_PM) || defined(CONFIG_SND_HDA_POWER_SAVE)
-#define SND_HDA_NEEDS_RESUME   /* resume control code is required */
-#endif
-
 /*
  * nodes
  */
@@ -704,8 +700,12 @@ struct hda_codec_ops {
        int (*init)(struct hda_codec *codec);
        void (*free)(struct hda_codec *codec);
        void (*unsol_event)(struct hda_codec *codec, unsigned int res);
-#ifdef SND_HDA_NEEDS_RESUME
+       void (*set_power_state)(struct hda_codec *codec, hda_nid_t fg,
+                               unsigned int power_state);
+#ifdef CONFIG_PM
        int (*suspend)(struct hda_codec *codec, pm_message_t state);
+       int (*post_suspend)(struct hda_codec *codec);
+       int (*pre_resume)(struct hda_codec *codec);
        int (*resume)(struct hda_codec *codec);
 #endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -927,7 +927,7 @@ void snd_hda_sequence_write(struct hda_codec *codec,
 int snd_hda_queue_unsol_event(struct hda_bus *bus, u32 res, u32 res_ex);
 
 /* cached write */
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid,
                              int direct, unsigned int verb, unsigned int parm);
 void snd_hda_sequence_write_cache(struct hda_codec *codec,
@@ -1008,6 +1008,9 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid,
  */
 void snd_hda_get_codec_name(struct hda_codec *codec, char *name, int namelen);
 void snd_hda_bus_reboot_notify(struct hda_bus *bus);
+void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg,
+                                   unsigned int power_state,
+                                   bool eapd_workaround);
 
 /*
  * power management
index 88b277e..2e7ac31 100644 (file)
@@ -131,7 +131,7 @@ int snd_hda_codec_amp_update(struct hda_codec *codec, hda_nid_t nid, int ch,
                             int direction, int idx, int mask, int val);
 int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid,
                             int dir, int idx, int mask, int val);
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 void snd_hda_codec_resume_amp(struct hda_codec *codec);
 #endif
 
index 1362c8b..8648917 100644 (file)
@@ -563,7 +563,7 @@ static void ad198x_free(struct hda_codec *codec)
        snd_hda_detach_beep_device(codec);
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int ad198x_suspend(struct hda_codec *codec, pm_message_t state)
 {
        ad198x_shutup(codec);
@@ -579,7 +579,7 @@ static const struct hda_codec_ops ad198x_patch_ops = {
 #ifdef CONFIG_SND_HDA_POWER_SAVE
        .check_power_status = ad198x_check_power_status,
 #endif
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
        .suspend = ad198x_suspend,
 #endif
        .reboot_notify = ad198x_shutup,
index 7f93739..47d6ffc 100644 (file)
@@ -25,6 +25,7 @@
 #include <sound/core.h>
 #include "hda_codec.h"
 #include "hda_local.h"
+#include <sound/tlv.h>
 
 /*
  */
@@ -61,9 +62,15 @@ struct cs_spec {
 
        unsigned int hp_detect:1;
        unsigned int mic_detect:1;
+       /* CS421x */
+       unsigned int spdif_detect:1;
+       unsigned int sense_b:1;
+       hda_nid_t vendor_nid;
+       struct hda_input_mux input_mux;
+       unsigned int last_input;
 };
 
-/* available models */
+/* available models with CS420x */
 enum {
        CS420X_MBP53,
        CS420X_MBP55,
@@ -72,6 +79,12 @@ enum {
        CS420X_MODELS
 };
 
+/* CS421x boards */
+enum {
+       CS421X_CDB4210,
+       CS421X_MODELS
+};
+
 /* Vendor-specific processing widget */
 #define CS420X_VENDOR_NID      0x11
 #define CS_DIG_OUT1_PIN_NID    0x10
@@ -111,21 +124,42 @@ enum {
 /* 0x0009 - 0x0014 -> 12 test regs */
 /* 0x0015 - visibility reg */
 
+/*
+ * Cirrus Logic CS4210
+ *
+ * 1 DAC => HP(sense) / Speakers,
+ * 1 ADC <= LineIn(sense) / MicIn / DMicIn,
+ * 1 SPDIF OUT => SPDIF Trasmitter(sense)
+*/
+#define CS4210_DAC_NID         0x02
+#define CS4210_ADC_NID         0x03
+#define CS421X_VENDOR_NID      0x0B
+#define CS421X_DMIC_PIN_NID    0x09 /* Port E */
+#define CS421X_SPDIF_PIN_NID   0x0A /* Port H */
+
+#define CS421X_IDX_DEV_CFG     0x01
+#define CS421X_IDX_ADC_CFG     0x02
+#define CS421X_IDX_DAC_CFG     0x03
+#define CS421X_IDX_SPK_CTL     0x04
+
+#define SPDIF_EVENT            0x04
 
 static inline int cs_vendor_coef_get(struct hda_codec *codec, unsigned int idx)
 {
-       snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+       struct cs_spec *spec = codec->spec;
+       snd_hda_codec_write(codec, spec->vendor_nid, 0,
                            AC_VERB_SET_COEF_INDEX, idx);
-       return snd_hda_codec_read(codec, CS420X_VENDOR_NID, 0,
+       return snd_hda_codec_read(codec, spec->vendor_nid, 0,
                                  AC_VERB_GET_PROC_COEF, 0);
 }
 
 static inline void cs_vendor_coef_set(struct hda_codec *codec, unsigned int idx,
                                      unsigned int coef)
 {
-       snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+       struct cs_spec *spec = codec->spec;
+       snd_hda_codec_write(codec, spec->vendor_nid, 0,
                            AC_VERB_SET_COEF_INDEX, idx);
-       snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+       snd_hda_codec_write(codec, spec->vendor_nid, 0,
                            AC_VERB_SET_PROC_COEF, coef);
 }
 
@@ -347,15 +381,12 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin,
        nid = codec->start_nid;
        for (i = 0; i < codec->num_nodes; i++, nid++) {
                unsigned int type;
-               int idx;
                type = get_wcaps_type(get_wcaps(codec, nid));
                if (type != AC_WID_AUD_IN)
                        continue;
-               idx = snd_hda_get_conn_index(codec, nid, pin, 0);
-               if (idx >= 0) {
-                       *idxp = idx;
+               *idxp = snd_hda_get_conn_index(codec, nid, pin, false);
+               if (*idxp >= 0)
                        return nid;
-               }
        }
        return 0;
 }
@@ -835,6 +866,8 @@ static int build_digital_input(struct hda_codec *codec)
 
 /*
  * auto-mute and auto-mic switching
+ * CS421x auto-output redirecting
+ * HP/SPK/SPDIF
  */
 
 static void cs_automute(struct hda_codec *codec)
@@ -842,9 +875,25 @@ static void cs_automute(struct hda_codec *codec)
        struct cs_spec *spec = codec->spec;
        struct auto_pin_cfg *cfg = &spec->autocfg;
        unsigned int hp_present;
+       unsigned int spdif_present;
        hda_nid_t nid;
        int i;
 
+       spdif_present = 0;
+       if (cfg->dig_outs) {
+               nid = cfg->dig_out_pins[0];
+               if (is_jack_detectable(codec, nid)) {
+                       /*
+                       TODO: SPDIF output redirect when SENSE_B is enabled.
+                       Shared (SENSE_A) jack (e.g HP/mini-TOSLINK)
+                       assumed.
+                       */
+                       if (snd_hda_jack_detect(codec, nid)
+                               /* && spec->sense_b */)
+                               spdif_present = 1;
+               }
+       }
+
        hp_present = 0;
        for (i = 0; i < cfg->hp_outs; i++) {
                nid = cfg->hp_pins[i];
@@ -854,11 +903,19 @@ static void cs_automute(struct hda_codec *codec)
                if (hp_present)
                        break;
        }
+
+       /* mute speakers if spdif or hp jack is plugged in */
        for (i = 0; i < cfg->speaker_outs; i++) {
                nid = cfg->speaker_pins[i];
                snd_hda_codec_write(codec, nid, 0,
                                    AC_VERB_SET_PIN_WIDGET_CONTROL,
                                    hp_present ? 0 : PIN_OUT);
+               /* detect on spdif is specific to CS421x */
+               if (spec->vendor_nid == CS421X_VENDOR_NID) {
+                       snd_hda_codec_write(codec, nid, 0,
+                                       AC_VERB_SET_PIN_WIDGET_CONTROL,
+                                       spdif_present ? 0 : PIN_OUT);
+               }
        }
        if (spec->board_config == CS420X_MBP53 ||
            spec->board_config == CS420X_MBP55 ||
@@ -867,21 +924,62 @@ static void cs_automute(struct hda_codec *codec)
                snd_hda_codec_write(codec, 0x01, 0,
                                    AC_VERB_SET_GPIO_DATA, gpio);
        }
+
+       /* specific to CS421x */
+       if (spec->vendor_nid == CS421X_VENDOR_NID) {
+               /* mute HPs if spdif jack (SENSE_B) is present */
+               for (i = 0; i < cfg->hp_outs; i++) {
+                       nid = cfg->hp_pins[i];
+                       snd_hda_codec_write(codec, nid, 0,
+                               AC_VERB_SET_PIN_WIDGET_CONTROL,
+                               (spdif_present && spec->sense_b) ? 0 : PIN_HP);
+               }
+
+               /* SPDIF TX on/off */
+               if (cfg->dig_outs) {
+                       nid = cfg->dig_out_pins[0];
+                       snd_hda_codec_write(codec, nid, 0,
+                               AC_VERB_SET_PIN_WIDGET_CONTROL,
+                               spdif_present ? PIN_OUT : 0);
+
+               }
+               /* Update board GPIOs if neccessary ... */
+       }
 }
 
+/*
+ * Auto-input redirect for CS421x
+ * Switch max 3 inputs of a single ADC (nid 3)
+*/
+
 static void cs_automic(struct hda_codec *codec)
 {
        struct cs_spec *spec = codec->spec;
        struct auto_pin_cfg *cfg = &spec->autocfg;
        hda_nid_t nid;
        unsigned int present;
-       
+
        nid = cfg->inputs[spec->automic_idx].pin;
        present = snd_hda_jack_detect(codec, nid);
-       if (present)
-               change_cur_input(codec, spec->automic_idx, 0);
-       else
-               change_cur_input(codec, !spec->automic_idx, 0);
+
+       /* specific to CS421x, single ADC */
+       if (spec->vendor_nid == CS421X_VENDOR_NID) {
+               if (present) {
+                       spec->last_input = spec->cur_input;
+                       spec->cur_input = spec->automic_idx;
+               } else  {
+                       spec->cur_input = spec->last_input;
+               }
+
+               snd_hda_codec_write_cache(codec, spec->cur_adc, 0,
+                                       AC_VERB_SET_CONNECT_SEL,
+                                       spec->adc_idx[spec->cur_input]);
+       } else {
+               if (present)
+                       change_cur_input(codec, spec->automic_idx, 0);
+               else
+                       change_cur_input(codec, !spec->automic_idx, 0);
+       }
 }
 
 /*
@@ -911,23 +1009,28 @@ static void init_output(struct hda_codec *codec)
        for (i = 0; i < cfg->line_outs; i++)
                snd_hda_codec_write(codec, cfg->line_out_pins[i], 0,
                                    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
+       /* HP */
        for (i = 0; i < cfg->hp_outs; i++) {
                hda_nid_t nid = cfg->hp_pins[i];
                snd_hda_codec_write(codec, nid, 0,
                                    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP);
                if (!cfg->speaker_outs)
                        continue;
-               if (is_jack_detectable(codec, nid)) {
+               if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) {
                        snd_hda_codec_write(codec, nid, 0,
                                            AC_VERB_SET_UNSOLICITED_ENABLE,
                                            AC_USRSP_EN | HP_EVENT);
                        spec->hp_detect = 1;
                }
        }
+
+       /* Speaker */
        for (i = 0; i < cfg->speaker_outs; i++)
                snd_hda_codec_write(codec, cfg->speaker_pins[i], 0,
                                    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
-       if (spec->hp_detect)
+
+       /* SPDIF is enabled on presence detect for CS421x */
+       if (spec->hp_detect || spec->spdif_detect)
                cs_automute(codec);
 }
 
@@ -961,19 +1064,31 @@ static void init_input(struct hda_codec *codec)
                                            AC_VERB_SET_UNSOLICITED_ENABLE,
                                            AC_USRSP_EN | MIC_EVENT);
        }
-       change_cur_input(codec, spec->cur_input, 1);
-       if (spec->mic_detect)
-               cs_automic(codec);
-
-       coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
-       if (is_active_pin(codec, CS_DMIC2_PIN_NID))
-               coef |= 0x0500; /* DMIC2 enable 2 channels, disable GPIO1 */
-       if (is_active_pin(codec, CS_DMIC1_PIN_NID))
-               coef |= 0x1800; /* DMIC1 enable 2 channels, disable GPIO0 
-                                * No effect if SPDIF_OUT2 is selected in 
-                                * IDX_SPDIF_CTL.
-                                 */
-       cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+       /* specific to CS421x */
+       if (spec->vendor_nid == CS421X_VENDOR_NID) {
+               if (spec->mic_detect)
+                       cs_automic(codec);
+               else  {
+                       spec->cur_adc = spec->adc_nid[spec->cur_input];
+                       snd_hda_codec_write(codec, spec->cur_adc, 0,
+                                       AC_VERB_SET_CONNECT_SEL,
+                                       spec->adc_idx[spec->cur_input]);
+               }
+       } else {
+               change_cur_input(codec, spec->cur_input, 1);
+               if (spec->mic_detect)
+                       cs_automic(codec);
+
+               coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
+               if (is_active_pin(codec, CS_DMIC2_PIN_NID))
+                       coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
+               if (is_active_pin(codec, CS_DMIC1_PIN_NID))
+                       coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
+                                        * No effect if SPDIF_OUT2 is
+                                        * selected in IDX_SPDIF_CTL.
+                                       */
+               cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+       }
 }
 
 static const struct hda_verb cs_coef_init_verbs[] = {
@@ -1221,16 +1336,16 @@ static const struct cs_pincfg *cs_pincfgs[CS420X_MODELS] = {
        [CS420X_IMAC27] = imac27_pincfgs,
 };
 
-static void fix_pincfg(struct hda_codec *codec, int model)
+static void fix_pincfg(struct hda_codec *codec, int model,
+                      const struct cs_pincfg **pin_configs)
 {
-       const struct cs_pincfg *cfg = cs_pincfgs[model];
+       const struct cs_pincfg *cfg = pin_configs[model];
        if (!cfg)
                return;
        for (; cfg->nid; cfg++)
                snd_hda_codec_set_pincfg(codec, cfg->nid, cfg->val);
 }
 
-
 static int patch_cs420x(struct hda_codec *codec)
 {
        struct cs_spec *spec;
@@ -1241,11 +1356,13 @@ static int patch_cs420x(struct hda_codec *codec)
                return -ENOMEM;
        codec->spec = spec;
 
+       spec->vendor_nid = CS420X_VENDOR_NID;
+
        spec->board_config =
                snd_hda_check_board_config(codec, CS420X_MODELS,
                                           cs420x_models, cs420x_cfg_tbl);
        if (spec->board_config >= 0)
-               fix_pincfg(codec, spec->board_config);
+               fix_pincfg(codec, spec->board_config, cs_pincfgs);
 
        switch (spec->board_config) {
        case CS420X_IMAC27:
@@ -1272,6 +1389,562 @@ static int patch_cs420x(struct hda_codec *codec)
        return err;
 }
 
+/*
+ * Cirrus Logic CS4210
+ *
+ * 1 DAC => HP(sense) / Speakers,
+ * 1 ADC <= LineIn(sense) / MicIn / DMicIn,
+ * 1 SPDIF OUT => SPDIF Trasmitter(sense)
+*/
+
+/* CS4210 board names */
+static const char *cs421x_models[CS421X_MODELS] = {
+       [CS421X_CDB4210] = "cdb4210",
+};
+
+static const struct snd_pci_quirk cs421x_cfg_tbl[] = {
+       /* Test Intel board + CDB2410  */
+       SND_PCI_QUIRK(0x8086, 0x5001, "DP45SG/CDB4210", CS421X_CDB4210),
+       {} /* terminator */
+};
+
+/* CS4210 board pinconfigs */
+/* Default CS4210 (CDB4210)*/
+static const struct cs_pincfg cdb4210_pincfgs[] = {
+       { 0x05, 0x0321401f },
+       { 0x06, 0x90170010 },
+       { 0x07, 0x03813031 },
+       { 0x08, 0xb7a70037 },
+       { 0x09, 0xb7a6003e },
+       { 0x0a, 0x034510f0 },
+       {} /* terminator */
+};
+
+static const struct cs_pincfg *cs421x_pincfgs[CS421X_MODELS] = {
+       [CS421X_CDB4210] = cdb4210_pincfgs,
+};
+
+static const struct hda_verb cs421x_coef_init_verbs[] = {
+       {0x0B, AC_VERB_SET_PROC_STATE, 1},
+       {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DEV_CFG},
+       /*
+           Disable Coefficient Index Auto-Increment(DAI)=1,
+           PDREF=0
+       */
+       {0x0B, AC_VERB_SET_PROC_COEF, 0x0001 },
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_ADC_CFG},
+       /* ADC SZCMode = Digital Soft Ramp */
+       {0x0B, AC_VERB_SET_PROC_COEF, 0x0002 },
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DAC_CFG},
+       {0x0B, AC_VERB_SET_PROC_COEF,
+        (0x0002 /* DAC SZCMode = Digital Soft Ramp */
+         | 0x0004 /* Mute DAC on FIFO error */
+         | 0x0008 /* Enable DAC High Pass Filter */
+         )},
+       {} /* terminator */
+};
+
+/* Errata: CS4210 rev A1 Silicon
+ *
+ * http://www.cirrus.com/en/pubs/errata/
+ *
+ * Description:
+ * 1. Performance degredation is present in the ADC.
+ * 2. Speaker output is not completely muted upon HP detect.
+ * 3. Noise is present when clipping occurs on the amplified
+ *    speaker outputs.
+ *
+ * Workaround:
+ * The following verb sequence written to the registers during
+ * initialization will correct the issues listed above.
+ */
+
+static const struct hda_verb cs421x_coef_init_verbs_A1_silicon_fixes[] = {
+       {0x0B, AC_VERB_SET_PROC_STATE, 0x01},  /* VPW: processing on */
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, 0x0006},
+       {0x0B, AC_VERB_SET_PROC_COEF, 0x9999}, /* Test mode: on */
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, 0x000A},
+       {0x0B, AC_VERB_SET_PROC_COEF, 0x14CB}, /* Chop double */
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, 0x0011},
+       {0x0B, AC_VERB_SET_PROC_COEF, 0xA2D0}, /* Increase ADC current */
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, 0x001A},
+       {0x0B, AC_VERB_SET_PROC_COEF, 0x02A9}, /* Mute speaker */
+
+       {0x0B, AC_VERB_SET_COEF_INDEX, 0x001B},
+       {0x0B, AC_VERB_SET_PROC_COEF, 0X1006}, /* Remove noise */
+
+       {} /* terminator */
+};
+
+/* Speaker Amp Gain is controlled by the vendor widget's coef 4 */
+static const DECLARE_TLV_DB_SCALE(cs421x_speaker_boost_db_scale, 900, 300, 0);
+
+static int cs421x_boost_vol_info(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_info *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+       uinfo->count = 1;
+       uinfo->value.integer.min = 0;
+       uinfo->value.integer.max = 3;
+       return 0;
+}
+
+static int cs421x_boost_vol_get(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+
+       ucontrol->value.integer.value[0] =
+               cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL) & 0x0003;
+       return 0;
+}
+
+static int cs421x_boost_vol_put(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+
+       unsigned int vol = ucontrol->value.integer.value[0];
+       unsigned int coef =
+               cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL);
+       unsigned int original_coef = coef;
+
+       coef &= ~0x0003;
+       coef |= (vol & 0x0003);
+       if (original_coef == coef)
+               return 0;
+       else {
+               cs_vendor_coef_set(codec, CS421X_IDX_SPK_CTL, coef);
+               return 1;
+       }
+}
+
+static const struct snd_kcontrol_new cs421x_speaker_bost_ctl = {
+
+       .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+       .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+                       SNDRV_CTL_ELEM_ACCESS_TLV_READ),
+       .name = "Speaker Boost Playback Volume",
+       .info = cs421x_boost_vol_info,
+       .get = cs421x_boost_vol_get,
+       .put = cs421x_boost_vol_put,
+       .tlv = { .p = cs421x_speaker_boost_db_scale },
+};
+
+static void cs421x_pinmux_init(struct hda_codec *codec)
+{
+       struct cs_spec *spec = codec->spec;
+       unsigned int def_conf, coef;
+
+       /* GPIO, DMIC_SCL, DMIC_SDA and SENSE_B are multiplexed */
+       coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG);
+
+       if (spec->gpio_mask)
+               coef |= 0x0008; /* B1,B2 are GPIOs */
+       else
+               coef &= ~0x0008;
+
+       if (spec->sense_b)
+               coef |= 0x0010; /* B2 is SENSE_B, not inverted  */
+       else
+               coef &= ~0x0010;
+
+       cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef);
+
+       if ((spec->gpio_mask || spec->sense_b) &&
+           is_active_pin(codec, CS421X_DMIC_PIN_NID)) {
+
+               /*
+                   GPIO or SENSE_B forced - disconnect the DMIC pin.
+               */
+               def_conf = snd_hda_codec_get_pincfg(codec, CS421X_DMIC_PIN_NID);
+               def_conf &= ~AC_DEFCFG_PORT_CONN;
+               def_conf |= (AC_JACK_PORT_NONE << AC_DEFCFG_PORT_CONN_SHIFT);
+               snd_hda_codec_set_pincfg(codec, CS421X_DMIC_PIN_NID, def_conf);
+       }
+}
+
+static void init_cs421x_digital(struct hda_codec *codec)
+{
+       struct cs_spec *spec = codec->spec;
+       struct auto_pin_cfg *cfg = &spec->autocfg;
+       int i;
+
+
+       for (i = 0; i < cfg->dig_outs; i++) {
+               hda_nid_t nid = cfg->dig_out_pins[i];
+               if (!cfg->speaker_outs)
+                       continue;
+               if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) {
+
+                       snd_hda_codec_write(codec, nid, 0,
+                                   AC_VERB_SET_UNSOLICITED_ENABLE,
+                                   AC_USRSP_EN | SPDIF_EVENT);
+                       spec->spdif_detect = 1;
+               }
+       }
+}
+
+static int cs421x_init(struct hda_codec *codec)
+{
+       struct cs_spec *spec = codec->spec;
+
+       snd_hda_sequence_write(codec, cs421x_coef_init_verbs);
+       snd_hda_sequence_write(codec, cs421x_coef_init_verbs_A1_silicon_fixes);
+
+       cs421x_pinmux_init(codec);
+
+       if (spec->gpio_mask) {
+               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK,
+                                   spec->gpio_mask);
+               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION,
+                                   spec->gpio_dir);
+               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
+                                   spec->gpio_data);
+       }
+
+       init_output(codec);
+       init_input(codec);
+       init_cs421x_digital(codec);
+
+       return 0;
+}
+
+/*
+ * CS4210 Input MUX (1 ADC)
+ */
+static int cs421x_mux_enum_info(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_info *uinfo)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct cs_spec *spec = codec->spec;
+
+       return snd_hda_input_mux_info(&spec->input_mux, uinfo);
+}
+
+static int cs421x_mux_enum_get(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct cs_spec *spec = codec->spec;
+
+       ucontrol->value.enumerated.item[0] = spec->cur_input;
+       return 0;
+}
+
+static int cs421x_mux_enum_put(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct cs_spec *spec = codec->spec;
+
+       return snd_hda_input_mux_put(codec, &spec->input_mux, ucontrol,
+                               spec->adc_nid[0], &spec->cur_input);
+
+}
+
+static struct snd_kcontrol_new cs421x_capture_source = {
+
+       .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+       .name = "Capture Source",
+       .access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+       .info = cs421x_mux_enum_info,
+       .get = cs421x_mux_enum_get,
+       .put = cs421x_mux_enum_put,
+};
+
+static int cs421x_add_input_volume_control(struct hda_codec *codec, int item)
+{
+       struct cs_spec *spec = codec->spec;
+       struct auto_pin_cfg *cfg = &spec->autocfg;
+       const struct hda_input_mux *imux = &spec->input_mux;
+       hda_nid_t pin = cfg->inputs[item].pin;
+       struct snd_kcontrol *kctl;
+       u32 caps;
+
+       if (!(get_wcaps(codec, pin) & AC_WCAP_IN_AMP))
+               return 0;
+
+       caps = query_amp_caps(codec, pin, HDA_INPUT);
+       caps = (caps & AC_AMPCAP_NUM_STEPS) >> AC_AMPCAP_NUM_STEPS_SHIFT;
+       if (caps <= 1)
+               return 0;
+
+       return add_volume(codec,  imux->items[item].label, 0,
+                         HDA_COMPOSE_AMP_VAL(pin, 3, 0, HDA_INPUT), 1, &kctl);
+}
+
+/* add a (input-boost) volume control to the given input pin */
+static int build_cs421x_input(struct hda_codec *codec)
+{
+       struct cs_spec *spec = codec->spec;
+       struct auto_pin_cfg *cfg = &spec->autocfg;
+       struct hda_input_mux *imux = &spec->input_mux;
+       int i, err, type_idx;
+       const char *label;
+
+       if (!spec->num_inputs)
+               return 0;
+
+       /* make bind-capture */
+       spec->capture_bind[0] = make_bind_capture(codec, &snd_hda_bind_sw);
+       spec->capture_bind[1] = make_bind_capture(codec, &snd_hda_bind_vol);
+       for (i = 0; i < 2; i++) {
+               struct snd_kcontrol *kctl;
+               int n;
+               if (!spec->capture_bind[i])
+                       return -ENOMEM;
+               kctl = snd_ctl_new1(&cs_capture_ctls[i], codec);
+               if (!kctl)
+                       return -ENOMEM;
+               kctl->private_value = (long)spec->capture_bind[i];
+               err = snd_hda_ctl_add(codec, 0, kctl);
+               if (err < 0)
+                       return err;
+               for (n = 0; n < AUTO_PIN_LAST; n++) {
+                       if (!spec->adc_nid[n])
+                               continue;
+                       err = snd_hda_add_nid(codec, kctl, 0, spec->adc_nid[n]);
+                       if (err < 0)
+                               return err;
+               }
+       }
+
+       /* Add Input MUX Items + Capture Volume/Switch */
+       for (i = 0; i < spec->num_inputs; i++) {
+               label = hda_get_autocfg_input_label(codec, cfg, i);
+               snd_hda_add_imux_item(imux, label, spec->adc_idx[i], &type_idx);
+
+               err = cs421x_add_input_volume_control(codec, i);
+               if (err < 0)
+                       return err;
+       }
+
+       /*
+           Add 'Capture Source' Switch if
+               * 2 inputs and no mic detec
+               * 3 inputs
+       */
+       if ((spec->num_inputs == 2 && !spec->mic_detect) ||
+           (spec->num_inputs == 3)) {
+
+               err = snd_hda_ctl_add(codec, spec->adc_nid[0],
+                             snd_ctl_new1(&cs421x_capture_source, codec));
+               if (err < 0)
+                       return err;
+       }
+
+       return 0;
+}
+
+/* Single DAC (Mute/Gain) */
+static int build_cs421x_output(struct hda_codec *codec)
+{
+       hda_nid_t dac = CS4210_DAC_NID;
+       struct cs_spec *spec = codec->spec;
+       struct auto_pin_cfg *cfg = &spec->autocfg;
+       struct snd_kcontrol *kctl;
+       int err;
+       char *name = "HP/Speakers";
+
+       fix_volume_caps(codec, dac);
+       if (!spec->vmaster_sw) {
+               err = add_vmaster(codec, dac);
+               if (err < 0)
+                       return err;
+       }
+
+       err = add_mute(codec, name, 0,
+                       HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl);
+       if (err < 0)
+               return err;
+       err = snd_ctl_add_slave(spec->vmaster_sw, kctl);
+       if (err < 0)
+               return err;
+
+       err = add_volume(codec, name, 0,
+                       HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl);
+       if (err < 0)
+               return err;
+       err = snd_ctl_add_slave(spec->vmaster_vol, kctl);
+       if (err < 0)
+               return err;
+
+       if (cfg->speaker_outs) {
+               err = snd_hda_ctl_add(codec, 0,
+                       snd_ctl_new1(&cs421x_speaker_bost_ctl, codec));
+               if (err < 0)
+                       return err;
+       }
+       return err;
+}
+
+static int cs421x_build_controls(struct hda_codec *codec)
+{
+       int err;
+
+       err = build_cs421x_output(codec);
+       if (err < 0)
+               return err;
+       err = build_cs421x_input(codec);
+       if (err < 0)
+               return err;
+       err = build_digital_output(codec);
+       if (err < 0)
+               return err;
+       return cs421x_init(codec);
+}
+
+static void cs421x_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+       switch ((res >> 26) & 0x3f) {
+       case HP_EVENT:
+       case SPDIF_EVENT:
+               cs_automute(codec);
+               break;
+
+       case MIC_EVENT:
+               cs_automic(codec);
+               break;
+       }
+}
+
+static int parse_cs421x_input(struct hda_codec *codec)
+{
+       struct cs_spec *spec = codec->spec;
+       struct auto_pin_cfg *cfg = &spec->autocfg;
+       int i;
+
+       for (i = 0; i < cfg->num_inputs; i++) {
+               hda_nid_t pin = cfg->inputs[i].pin;
+               spec->adc_nid[i] = get_adc(codec, pin, &spec->adc_idx[i]);
+               spec->cur_input = spec->last_input = i;
+               spec->num_inputs++;
+
+               /* check whether the automatic mic switch is available */
+               if (is_ext_mic(codec, i) && cfg->num_inputs >= 2) {
+                       spec->mic_detect = 1;
+                       spec->automic_idx = i;
+               }
+       }
+       return 0;
+}
+
+static int cs421x_parse_auto_config(struct hda_codec *codec)
+{
+       struct cs_spec *spec = codec->spec;
+       int err;
+
+       err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
+       if (err < 0)
+               return err;
+       err = parse_output(codec);
+       if (err < 0)
+               return err;
+       err = parse_cs421x_input(codec);
+       if (err < 0)
+               return err;
+       err = parse_digital_output(codec);
+       if (err < 0)
+               return err;
+       return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+       Manage PDREF, when transitioning to D3hot
+       (DAC,ADC) -> D3, PDREF=1, AFG->D3
+*/
+static int cs421x_suspend(struct hda_codec *codec, pm_message_t state)
+{
+       unsigned int coef;
+
+       snd_hda_shutup_pins(codec);
+
+       snd_hda_codec_write(codec, CS4210_DAC_NID, 0,
+                           AC_VERB_SET_POWER_STATE,  AC_PWRST_D3);
+       snd_hda_codec_write(codec, CS4210_ADC_NID, 0,
+                           AC_VERB_SET_POWER_STATE,  AC_PWRST_D3);
+
+       coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG);
+       coef |= 0x0004; /* PDREF */
+       cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef);
+
+       return 0;
+}
+#endif
+
+static struct hda_codec_ops cs4210_patch_ops = {
+       .build_controls = cs421x_build_controls,
+       .build_pcms = cs_build_pcms,
+       .init = cs421x_init,
+       .free = cs_free,
+       .unsol_event = cs421x_unsol_event,
+#ifdef CONFIG_PM
+       .suspend = cs421x_suspend,
+#endif
+};
+
+static int patch_cs421x(struct hda_codec *codec)
+{
+       struct cs_spec *spec;
+       int err;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return -ENOMEM;
+       codec->spec = spec;
+
+       spec->vendor_nid = CS421X_VENDOR_NID;
+
+       spec->board_config =
+               snd_hda_check_board_config(codec, CS421X_MODELS,
+                                          cs421x_models, cs421x_cfg_tbl);
+       if (spec->board_config >= 0)
+               fix_pincfg(codec, spec->board_config, cs421x_pincfgs);
+       /*
+           Setup GPIO/SENSE for each board (if used)
+       */
+       switch (spec->board_config) {
+       case CS421X_CDB4210:
+               snd_printd("CS4210 board: %s\n",
+                       cs421x_models[spec->board_config]);
+/*             spec->gpio_mask = 3;
+               spec->gpio_dir = 3;
+               spec->gpio_data = 3;
+*/
+               spec->sense_b = 1;
+
+               break;
+       }
+
+       /*
+           Update the GPIO/DMIC/SENSE_B pinmux before the configuration
+           is auto-parsed. If GPIO or SENSE_B is forced, DMIC input
+           is disabled.
+       */
+       cs421x_pinmux_init(codec);
+
+       err = cs421x_parse_auto_config(codec);
+       if (err < 0)
+               goto error;
+
+       codec->patch_ops = cs4210_patch_ops;
+
+       return 0;
+
+ error:
+       kfree(codec->spec);
+       codec->spec = NULL;
+       return err;
+}
+
 
 /*
  * patch entries
@@ -1279,11 +1952,13 @@ static int patch_cs420x(struct hda_codec *codec)
 static const struct hda_codec_preset snd_hda_preset_cirrus[] = {
        { .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x },
        { .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x },
+       { .id = 0x10134210, .name = "CS4210", .patch = patch_cs421x },
        {} /* terminator */
 };
 
 MODULE_ALIAS("snd-hda-codec-id:10134206");
 MODULE_ALIAS("snd-hda-codec-id:10134207");
+MODULE_ALIAS("snd-hda-codec-id:10134210");
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cirrus Logic HD-audio codec");
index 884f67b..502fc94 100644 (file)
@@ -446,6 +446,19 @@ static int conexant_init_jacks(struct hda_codec *codec)
        return 0;
 }
 
+static void conexant_set_power(struct hda_codec *codec, hda_nid_t fg,
+                              unsigned int power_state)
+{
+       if (power_state == AC_PWRST_D3)
+               msleep(100);
+       snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+                           power_state);
+       /* partial workaround for "azx_get_response timeout" */
+       if (power_state == AC_PWRST_D0)
+               msleep(10);
+       snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
+
 static int conexant_init(struct hda_codec *codec)
 {
        struct conexant_spec *spec = codec->spec;
@@ -588,6 +601,7 @@ static const struct hda_codec_ops conexant_patch_ops = {
        .build_pcms = conexant_build_pcms,
        .init = conexant_init,
        .free = conexant_free,
+       .set_power_state = conexant_set_power,
 #ifdef CONFIG_SND_HDA_POWER_SAVE
        .suspend = conexant_suspend,
 #endif
index 52ce075..e125c60 100644 (file)
@@ -895,13 +895,15 @@ static void alc_init_auto_hp(struct hda_codec *codec)
        if (present == 3)
                spec->automute_hp_lo = 1; /* both HP and LO automute */
 
-       if (!cfg->speaker_pins[0]) {
+       if (!cfg->speaker_pins[0] &&
+           cfg->line_out_type == AUTO_PIN_SPEAKER_OUT) {
                memcpy(cfg->speaker_pins, cfg->line_out_pins,
                       sizeof(cfg->speaker_pins));
                cfg->speaker_outs = cfg->line_outs;
        }
 
-       if (!cfg->hp_pins[0]) {
+       if (!cfg->hp_pins[0] &&
+           cfg->line_out_type == AUTO_PIN_HP_OUT) {
                memcpy(cfg->hp_pins, cfg->line_out_pins,
                       sizeof(cfg->hp_pins));
                cfg->hp_outs = cfg->line_outs;
@@ -920,6 +922,7 @@ static void alc_init_auto_hp(struct hda_codec *codec)
                spec->automute_mode = ALC_AUTOMUTE_PIN;
        }
        if (spec->automute && cfg->line_out_pins[0] &&
+           cfg->speaker_pins[0] &&
            cfg->line_out_pins[0] != cfg->hp_pins[0] &&
            cfg->line_out_pins[0] != cfg->speaker_pins[0]) {
                for (i = 0; i < cfg->line_outs; i++) {
@@ -1911,7 +1914,7 @@ static int alc_build_controls(struct hda_codec *codec)
                                return err;
                }
        }
-       if (spec->cap_mixer) {
+       if (spec->cap_mixer && spec->adc_nids) {
                const char *kname = kctl ? kctl->id.name : NULL;
                for (knew = spec->cap_mixer; knew->name; knew++) {
                        if (kname && strcmp(knew->name, kname) == 0)
@@ -2386,7 +2389,7 @@ static int alc_suspend(struct hda_codec *codec, pm_message_t state)
 }
 #endif
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int alc_resume(struct hda_codec *codec)
 {
        msleep(150); /* to avoid pop noise */
@@ -2406,7 +2409,7 @@ static const struct hda_codec_ops alc_patch_ops = {
        .init = alc_init,
        .free = alc_free,
        .unsol_event = alc_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
        .resume = alc_resume,
 #endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -2801,7 +2804,8 @@ static int alc_auto_fill_dac_nids(struct hda_codec *codec)
        int i;
 
  again:
-       spec->multiout.num_dacs = 0;
+       /* set num_dacs once to full for alc_auto_look_for_dac() */
+       spec->multiout.num_dacs = cfg->line_outs;
        spec->multiout.hp_nid = 0;
        spec->multiout.extra_out_nid[0] = 0;
        memset(spec->private_dac_nids, 0, sizeof(spec->private_dac_nids));
@@ -2834,6 +2838,8 @@ static int alc_auto_fill_dac_nids(struct hda_codec *codec)
                }
        }
 
+       /* re-count num_dacs and squash invalid entries */
+       spec->multiout.num_dacs = 0;
        for (i = 0; i < cfg->line_outs; i++) {
                if (spec->private_dac_nids[i])
                        spec->multiout.num_dacs++;
@@ -3674,7 +3680,7 @@ static int patch_alc880(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc880_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -3801,7 +3807,7 @@ static int patch_alc260(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc260_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -3980,7 +3986,7 @@ static int patch_alc882(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc882_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -4134,7 +4140,7 @@ static int patch_alc262(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc262_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -4290,7 +4296,7 @@ static int patch_alc268(struct hda_codec *codec)
                                          (0 << AC_AMPCAP_MUTE_SHIFT));
        }
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -4410,7 +4416,7 @@ static void alc269_shutup(struct hda_codec *codec)
        }
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int alc269_resume(struct hda_codec *codec)
 {
        if ((alc_read_coef_idx(codec, 0) & 0x00ff) == 0x018) {
@@ -4433,7 +4439,7 @@ static int alc269_resume(struct hda_codec *codec)
        hda_call_check_power_status(codec, 0x01);
        return 0;
 }
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
 
 static void alc269_fixup_hweq(struct hda_codec *codec,
                               const struct alc_fixup *fix, int action)
@@ -4702,7 +4708,7 @@ static int patch_alc269(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc269_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -4725,7 +4731,7 @@ static int patch_alc269(struct hda_codec *codec)
        spec->vmaster_nid = 0x02;
 
        codec->patch_ops = alc_patch_ops;
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
        codec->patch_ops.resume = alc269_resume;
 #endif
        if (board_config == ALC_MODEL_AUTO)
@@ -4840,7 +4846,7 @@ static int patch_alc861(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc861_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -4981,7 +4987,7 @@ static int patch_alc861vd(struct hda_codec *codec)
                add_verb(spec, alc660vd_eapd_verbs);
        }
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -5197,7 +5203,7 @@ static int patch_alc662(struct hda_codec *codec)
        if (board_config != ALC_MODEL_AUTO)
                setup_preset(codec, &alc662_presets[board_config]);
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
@@ -5333,7 +5339,7 @@ static int patch_alc680(struct hda_codec *codec)
 #endif
        }
 
-       if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+       if (!spec->no_analog && !spec->adc_nids) {
                alc_auto_fill_adc_caps(codec);
                alc_rebuild_imux_for_auto_mic(codec);
                alc_remove_invalid_adc_nids(codec);
index 56425a5..aa376b5 100644 (file)
@@ -95,6 +95,7 @@ enum {
        STAC_92HD83XXX_PWR_REF,
        STAC_DELL_S14,
        STAC_92HD83XXX_HP,
+       STAC_92HD83XXX_HP_cNB11_INTQUAD,
        STAC_HP_DV7_4000,
        STAC_92HD83XXX_MODELS
 };
@@ -212,6 +213,7 @@ struct sigmatel_spec {
        unsigned int gpio_mute;
        unsigned int gpio_led;
        unsigned int gpio_led_polarity;
+       unsigned int vref_led;
 
        /* stream */
        unsigned int stream_delay;
@@ -671,6 +673,30 @@ static int stac92xx_smux_enum_put(struct snd_kcontrol *kcontrol,
        return 0;
 }
 
+static int stac_vrefout_set(struct hda_codec *codec,
+                                       hda_nid_t nid, unsigned int new_vref)
+{
+       int error, pinctl;
+
+       snd_printdd("%s, nid %x ctl %x\n", __func__, nid, new_vref);
+       pinctl = snd_hda_codec_read(codec, nid, 0,
+                               AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+
+       if (pinctl < 0)
+               return pinctl;
+
+       pinctl &= 0xff;
+       pinctl &= ~AC_PINCTL_VREFEN;
+       pinctl |= (new_vref & AC_PINCTL_VREFEN);
+
+       error = snd_hda_codec_write_cache(codec, nid, 0,
+                                       AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl);
+       if (error < 0)
+               return error;
+
+       return 1;
+}
+
 static unsigned int stac92xx_vref_set(struct hda_codec *codec,
                                        hda_nid_t nid, unsigned int new_vref)
 {
@@ -1636,10 +1662,17 @@ static const unsigned int hp_dv7_4000_pin_configs[10] = {
        0x40f000f0, 0x40f000f0,
 };
 
+static const unsigned int hp_cNB11_intquad_pin_configs[10] = {
+       0x40f000f0, 0x0221101f, 0x02a11020, 0x92170110,
+       0x40f000f0, 0x92170110, 0x40f000f0, 0xd5a30130,
+       0x40f000f0, 0x40f000f0,
+};
+
 static const unsigned int *stac92hd83xxx_brd_tbl[STAC_92HD83XXX_MODELS] = {
        [STAC_92HD83XXX_REF] = ref92hd83xxx_pin_configs,
        [STAC_92HD83XXX_PWR_REF] = ref92hd83xxx_pin_configs,
        [STAC_DELL_S14] = dell_s14_pin_configs,
+       [STAC_92HD83XXX_HP_cNB11_INTQUAD] = hp_cNB11_intquad_pin_configs,
        [STAC_HP_DV7_4000] = hp_dv7_4000_pin_configs,
 };
 
@@ -1649,6 +1682,7 @@ static const char * const stac92hd83xxx_models[STAC_92HD83XXX_MODELS] = {
        [STAC_92HD83XXX_PWR_REF] = "mic-ref",
        [STAC_DELL_S14] = "dell-s14",
        [STAC_92HD83XXX_HP] = "hp",
+       [STAC_92HD83XXX_HP_cNB11_INTQUAD] = "hp_cNB11_intquad",
        [STAC_HP_DV7_4000] = "hp-dv7-4000",
 };
 
@@ -1661,7 +1695,47 @@ static const struct snd_pci_quirk stac92hd83xxx_cfg_tbl[] = {
        SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02ba,
                      "unknown Dell", STAC_DELL_S14),
        SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x3600,
-                     "HP", STAC_92HD83XXX_HP),
+                         "HP", STAC_92HD83XXX_HP),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1656,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1657,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1658,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1659,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165A,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165B,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3388,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3389,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355B,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355C,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355D,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355E,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355F,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3560,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358B,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358C,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358D,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3591,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3592,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+       SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3593,
+                         "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
        {} /* terminator */
 };
 
@@ -4020,6 +4094,8 @@ static void stac_gpio_set(struct hda_codec *codec, unsigned int mask,
 {
        unsigned int gpiostate, gpiomask, gpiodir;
 
+       snd_printdd("%s msk %x dir %x gpio %x\n", __func__, mask, dir_mask, data);
+
        gpiostate = snd_hda_codec_read(codec, codec->afg, 0,
                                       AC_VERB_GET_GPIO_DATA, 0);
        gpiostate = (gpiostate & ~dir_mask) | (data & dir_mask);
@@ -4209,10 +4285,12 @@ static void stac_store_hints(struct hda_codec *codec)
                spec->eapd_switch = val;
        get_int_hint(codec, "gpio_led_polarity", &spec->gpio_led_polarity);
        if (get_int_hint(codec, "gpio_led", &spec->gpio_led)) {
-               spec->gpio_mask |= spec->gpio_led;
-               spec->gpio_dir |= spec->gpio_led;
-               if (spec->gpio_led_polarity)
-                       spec->gpio_data |= spec->gpio_led;
+               if (spec->gpio_led <= 8) {
+                       spec->gpio_mask |= spec->gpio_led;
+                       spec->gpio_dir |= spec->gpio_led;
+                       if (spec->gpio_led_polarity)
+                               spec->gpio_data |= spec->gpio_led;
+               }
        }
 }
 
@@ -4382,11 +4460,26 @@ static void stac92xx_free_kctls(struct hda_codec *codec)
        snd_array_free(&spec->kctls);
 }
 
+static void stac92xx_shutup_pins(struct hda_codec *codec)
+{
+       unsigned int i, def_conf;
+
+       if (codec->bus->shutdown)
+               return;
+       for (i = 0; i < codec->init_pins.used; i++) {
+               struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+               def_conf = snd_hda_codec_get_pincfg(codec, pin->nid);
+               if (get_defcfg_connect(def_conf) != AC_JACK_PORT_NONE)
+                       snd_hda_codec_write(codec, pin->nid, 0,
+                                   AC_VERB_SET_PIN_WIDGET_CONTROL, 0);
+       }
+}
+
 static void stac92xx_shutup(struct hda_codec *codec)
 {
        struct sigmatel_spec *spec = codec->spec;
 
-       snd_hda_shutup_pins(codec);
+       stac92xx_shutup_pins(codec);
 
        if (spec->eapd_mask)
                stac_gpio_set(codec, spec->gpio_mask,
@@ -4784,10 +4877,11 @@ static int find_mute_led_gpio(struct hda_codec *codec, int default_polarity)
        if ((codec->subsystem_id >> 16) == PCI_VENDOR_ID_HP) {
                while ((dev = dmi_find_device(DMI_DEV_TYPE_OEM_STRING,
                                                                NULL, dev))) {
-                       if (sscanf(dev->name, "HP_Mute_LED_%d_%d",
+                       if (sscanf(dev->name, "HP_Mute_LED_%d_%x",
                                  &spec->gpio_led_polarity,
                                  &spec->gpio_led) == 2) {
-                               spec->gpio_led = 1 << spec->gpio_led;
+                               if (spec->gpio_led < 4)
+                                       spec->gpio_led = 1 << spec->gpio_led;
                                return 1;
                        }
                        if (sscanf(dev->name, "HP_Mute_LED_%d",
@@ -4885,7 +4979,7 @@ static void stac927x_proc_hook(struct snd_info_buffer *buffer,
 #define stac927x_proc_hook     NULL
 #endif
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int stac92xx_resume(struct hda_codec *codec)
 {
        struct sigmatel_spec *spec = codec->spec;
@@ -4901,29 +4995,81 @@ static int stac92xx_resume(struct hda_codec *codec)
                        stac_issue_unsol_event(codec,
                                               spec->autocfg.line_out_pins[0]);
        }
+       return 0;
+}
+
+static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state)
+{
+       stac92xx_shutup(codec);
+       return 0;
+}
+
+#ifdef CONFIG_SND_HDA_POWER_SAVE
+static int stac92xx_pre_resume(struct hda_codec *codec)
+{
+       struct sigmatel_spec *spec = codec->spec;
+
        /* sync mute LED */
-       if (spec->gpio_led)
-               hda_call_check_power_status(codec, 0x01);
+       if (spec->gpio_led) {
+               if (spec->gpio_led <= 8) {
+                       stac_gpio_set(codec, spec->gpio_mask,
+                                       spec->gpio_dir, spec->gpio_data);
+               } else {
+                       stac_vrefout_set(codec,
+                                       spec->gpio_led, spec->vref_led);
+               }
+       }
        return 0;
 }
 
+static int stac92xx_post_suspend(struct hda_codec *codec)
+{
+       struct sigmatel_spec *spec = codec->spec;
+       if (spec->gpio_led > 8) {
+               /* with vref-out pin used for mute led control
+                * codec AFG is prevented from D3 state, but on
+                * system suspend it can (and should) be used
+                */
+               snd_hda_codec_read(codec, codec->afg, 0,
+                               AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+       }
+       return 0;
+}
+
+static void stac92xx_set_power_state(struct hda_codec *codec, hda_nid_t fg,
+                               unsigned int power_state)
+{
+       unsigned int afg_power_state = power_state;
+       struct sigmatel_spec *spec = codec->spec;
+
+       if (power_state == AC_PWRST_D3) {
+               if (spec->gpio_led > 8) {
+                       /* with vref-out pin used for mute led control
+                        * codec AFG is prevented from D3 state
+                        */
+                       afg_power_state = AC_PWRST_D1;
+               }
+               /* this delay seems necessary to avoid click noise at power-down */
+               msleep(100);
+       }
+       snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+                       afg_power_state);
+       snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
+
 /*
- * using power check for controlling mute led of HP notebooks
- * check for mute state only on Speakers (nid = 0x10)
- *
- * For this feature CONFIG_SND_HDA_POWER_SAVE is needed, otherwise
- * the LED is NOT working properly !
- *
- * Changed name to reflect that it now works for any designated
- * model, not just HP HDX.
+ * For this feature CONFIG_SND_HDA_POWER_SAVE is needed
+ * as mute LED state is updated in check_power_status hook
  */
-
-#ifdef CONFIG_SND_HDA_POWER_SAVE
-static int stac92xx_hp_check_power_status(struct hda_codec *codec,
-                                             hda_nid_t nid)
+static int stac92xx_update_led_status(struct hda_codec *codec)
 {
        struct sigmatel_spec *spec = codec->spec;
-       int i, muted = 1;
+       int i, num_ext_dacs, muted = 1;
+       unsigned int muted_lvl, notmtd_lvl;
+       hda_nid_t nid;
+
+       if (!spec->gpio_led)
+               return 0;
 
        for (i = 0; i < spec->multiout.num_dacs; i++) {
                nid = spec->multiout.dac_nids[i];
@@ -4933,27 +5079,58 @@ static int stac92xx_hp_check_power_status(struct hda_codec *codec,
                        break;
                }
        }
-       if (muted)
-               spec->gpio_data &= ~spec->gpio_led; /* orange */
-       else
-               spec->gpio_data |= spec->gpio_led; /* white */
-
-       if (!spec->gpio_led_polarity) {
-               /* LED state is inverted on these systems */
-               spec->gpio_data ^= spec->gpio_led;
+       if (muted && spec->multiout.hp_nid)
+               if (!(snd_hda_codec_amp_read(codec,
+                               spec->multiout.hp_nid, 0, HDA_OUTPUT, 0) &
+                                       HDA_AMP_MUTE)) {
+                       muted = 0; /* HP is not muted */
+               }
+       num_ext_dacs = ARRAY_SIZE(spec->multiout.extra_out_nid);
+       for (i = 0; muted && i < num_ext_dacs; i++) {
+               nid = spec->multiout.extra_out_nid[i];
+               if (nid == 0)
+                       break;
+               if (!(snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
+                     HDA_AMP_MUTE)) {
+                       muted = 0; /* extra output is not muted */
+               }
        }
+       /*polarity defines *not* muted state level*/
+       if (spec->gpio_led <= 8) {
+               if (muted)
+                       spec->gpio_data &= ~spec->gpio_led; /* orange */
+               else
+                       spec->gpio_data |= spec->gpio_led; /* white */
 
-       stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data);
+               if (!spec->gpio_led_polarity) {
+                       /* LED state is inverted on these systems */
+                       spec->gpio_data ^= spec->gpio_led;
+               }
+               stac_gpio_set(codec, spec->gpio_mask,
+                               spec->gpio_dir, spec->gpio_data);
+       } else {
+               notmtd_lvl = spec->gpio_led_polarity ?
+                               AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_GRD;
+               muted_lvl = spec->gpio_led_polarity ?
+                               AC_PINCTL_VREF_GRD : AC_PINCTL_VREF_HIZ;
+               spec->vref_led = muted ? muted_lvl : notmtd_lvl;
+               stac_vrefout_set(codec, spec->gpio_led, spec->vref_led);
+       }
        return 0;
 }
-#endif
 
-static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state)
+/*
+ * use power check for controlling mute led of HP notebooks
+ */
+static int stac92xx_check_power_status(struct hda_codec *codec,
+                                             hda_nid_t nid)
 {
-       stac92xx_shutup(codec);
+       stac92xx_update_led_status(codec);
+
        return 0;
 }
-#endif
+#endif /* CONFIG_SND_HDA_POWER_SAVE */
+#endif /* CONFIG_PM */
 
 static const struct hda_codec_ops stac92xx_patch_ops = {
        .build_controls = stac92xx_build_controls,
@@ -4961,7 +5138,7 @@ static const struct hda_codec_ops stac92xx_patch_ops = {
        .init = stac92xx_init,
        .free = stac92xx_free,
        .unsol_event = stac92xx_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
        .suspend = stac92xx_suspend,
        .resume = stac92xx_resume,
 #endif
@@ -5477,12 +5654,19 @@ again:
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
        if (spec->gpio_led) {
-               spec->gpio_mask |= spec->gpio_led;
-               spec->gpio_dir |= spec->gpio_led;
-               spec->gpio_data |= spec->gpio_led;
-               /* register check_power_status callback. */
+               if (spec->gpio_led <= 8) {
+                       spec->gpio_mask |= spec->gpio_led;
+                       spec->gpio_dir |= spec->gpio_led;
+                       spec->gpio_data |= spec->gpio_led;
+               } else {
+                       codec->patch_ops.set_power_state =
+                                       stac92xx_set_power_state;
+                       codec->patch_ops.post_suspend =
+                                       stac92xx_post_suspend;
+               }
+               codec->patch_ops.pre_resume = stac92xx_pre_resume;
                codec->patch_ops.check_power_status =
-                       stac92xx_hp_check_power_status;
+                       stac92xx_check_power_status;
        }
 #endif 
 
@@ -5805,12 +5989,19 @@ again:
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
        if (spec->gpio_led) {
-               spec->gpio_mask |= spec->gpio_led;
-               spec->gpio_dir |= spec->gpio_led;
-               spec->gpio_data |= spec->gpio_led;
-               /* register check_power_status callback. */
+               if (spec->gpio_led <= 8) {
+                       spec->gpio_mask |= spec->gpio_led;
+                       spec->gpio_dir |= spec->gpio_led;
+                       spec->gpio_data |= spec->gpio_led;
+               } else {
+                       codec->patch_ops.set_power_state =
+                                       stac92xx_set_power_state;
+                       codec->patch_ops.post_suspend =
+                                       stac92xx_post_suspend;
+               }
+               codec->patch_ops.pre_resume = stac92xx_pre_resume;
                codec->patch_ops.check_power_status =
-                       stac92xx_hp_check_power_status;
+                       stac92xx_check_power_status;
        }
 #endif 
 
index f38160b..84d8798 100644 (file)
@@ -1708,7 +1708,7 @@ static void via_unsol_event(struct hda_codec *codec,
                via_gpio_control(codec);
 }
 
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
 static int via_suspend(struct hda_codec *codec, pm_message_t state)
 {
        struct via_spec *spec = codec->spec;
@@ -1736,7 +1736,7 @@ static const struct hda_codec_ops via_patch_ops = {
        .init = via_init,
        .free = via_free,
        .unsol_event = via_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
        .suspend = via_suspend,
 #endif
 #ifdef CONFIG_SND_HDA_POWER_SAVE
index ff29380..76258f2 100644 (file)
@@ -907,6 +907,7 @@ static int ldo_regulator_register(struct snd_soc_codec *codec,
                                struct regulator_init_data *init_data,
                                int voltage)
 {
+       dev_err(codec->dev, "this setup needs regulator support in the kernel\n");
        return -EINVAL;
 }
 
@@ -1218,6 +1219,34 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec)
        return 0;
 }
 
+static int sgtl5000_replace_vddd_with_ldo(struct snd_soc_codec *codec)
+{
+       struct sgtl5000_priv *sgtl5000 = snd_soc_codec_get_drvdata(codec);
+       int ret;
+
+       /* set internal ldo to 1.2v */
+       ret = ldo_regulator_register(codec, &ldo_init_data, LDO_VOLTAGE);
+       if (ret) {
+               dev_err(codec->dev,
+                       "Failed to register vddd internal supplies: %d\n", ret);
+               return ret;
+       }
+
+       sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
+
+       ret = regulator_bulk_get(codec->dev, ARRAY_SIZE(sgtl5000->supplies),
+                       sgtl5000->supplies);
+
+       if (ret) {
+               ldo_regulator_remove(codec);
+               dev_err(codec->dev, "Failed to request supplies: %d\n", ret);
+               return ret;
+       }
+
+       dev_info(codec->dev, "Using internal LDO instead of VDDD\n");
+       return 0;
+}
+
 static int sgtl5000_enable_regulators(struct snd_soc_codec *codec)
 {
        u16 reg;
@@ -1235,30 +1264,9 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec)
        if (!ret)
                external_vddd = 1;
        else {
-               /* set internal ldo to 1.2v */
-               int voltage = LDO_VOLTAGE;
-
-               ret = ldo_regulator_register(codec, &ldo_init_data, voltage);
-               if (ret) {
-                       dev_err(codec->dev,
-                       "Failed to register vddd internal supplies: %d\n",
-                               ret);
-                       return ret;
-               }
-
-               sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
-
-               ret = regulator_bulk_get(codec->dev,
-                               ARRAY_SIZE(sgtl5000->supplies),
-                               sgtl5000->supplies);
-
-               if (ret) {
-                       ldo_regulator_remove(codec);
-                       dev_err(codec->dev,
-                               "Failed to request supplies: %d\n", ret);
-
+               ret = sgtl5000_replace_vddd_with_ldo(codec);
+               if (ret)
                        return ret;
-               }
        }
 
        ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies),
@@ -1287,7 +1295,6 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec)
         * roll back to use internal LDO
         */
        if (external_vddd && rev >= 0x11) {
-               int voltage = LDO_VOLTAGE;
                /* disable all regulator first */
                regulator_bulk_disable(ARRAY_SIZE(sgtl5000->supplies),
                                        sgtl5000->supplies);
@@ -1295,23 +1302,10 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec)
                regulator_bulk_free(ARRAY_SIZE(sgtl5000->supplies),
                                        sgtl5000->supplies);
 
-               ret = ldo_regulator_register(codec, &ldo_init_data, voltage);
+               ret = sgtl5000_replace_vddd_with_ldo(codec);
                if (ret)
                        return ret;
 
-               sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
-
-               ret = regulator_bulk_get(codec->dev,
-                               ARRAY_SIZE(sgtl5000->supplies),
-                               sgtl5000->supplies);
-               if (ret) {
-                       ldo_regulator_remove(codec);
-                       dev_err(codec->dev,
-                               "Failed to request supplies: %d\n", ret);
-
-                       return ret;
-               }
-
                ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies),
                                                sgtl5000->supplies);
                if (ret)
index 8499c56..60d740e 100644 (file)
@@ -3409,6 +3409,9 @@ static irqreturn_t wm8962_irq(int irq, void *data)
        active = snd_soc_read(codec, WM8962_INTERRUPT_STATUS_2);
        active &= ~mask;
 
+       /* Acknowledge the interrupts */
+       snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active);
+
        if (active & WM8962_FLL_LOCK_EINT) {
                dev_dbg(codec->dev, "FLL locked\n");
                complete(&wm8962->fll_lock);
@@ -3433,9 +3436,6 @@ static irqreturn_t wm8962_irq(int irq, void *data)
                                      msecs_to_jiffies(250));
        }
 
-       /* Acknowledge the interrupts */
-       snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active);
-
        return IRQ_HANDLED;
 }
 
index 9259f1f..1f11525 100644 (file)
@@ -62,9 +62,9 @@ static void davinci_vcif_start(struct snd_pcm_substream *substream)
        w = readl(davinci_vc->base + DAVINCI_VC_CTRL);
 
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1);
+               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0);
        else
-               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1);
+               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0);
 
        writel(w, davinci_vc->base + DAVINCI_VC_CTRL);
 }
@@ -80,9 +80,9 @@ static void davinci_vcif_stop(struct snd_pcm_substream *substream)
        /* Reset transmitter/receiver and sample rate/frame sync generators */
        w = readl(davinci_vc->base + DAVINCI_VC_CTRL);
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0);
+               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1);
        else
-               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0);
+               MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1);
 
        writel(w, davinci_vc->base + DAVINCI_VC_CTRL);
 }
@@ -159,6 +159,7 @@ static int davinci_vcif_trigger(struct snd_pcm_substream *substream, int cmd,
        case SNDRV_PCM_TRIGGER_RESUME:
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
                davinci_vcif_start(substream);
+               break;
        case SNDRV_PCM_TRIGGER_STOP:
        case SNDRV_PCM_TRIGGER_SUSPEND:
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
index 1568eea..c086b78 100644 (file)
@@ -21,6 +21,7 @@
 #include <plat/audio.h>
 
 #include "dma.h"
+#include "idma.h"
 #include "i2s.h"
 #include "i2s-regs.h"
 
@@ -60,6 +61,7 @@ struct i2s_dai {
        /* DMA parameters */
        struct s3c_dma_params dma_playback;
        struct s3c_dma_params dma_capture;
+       struct s3c_dma_params idma_playback;
        u32     quirks;
        u32     suspend_i2smod;
        u32     suspend_i2scon;
@@ -877,6 +879,10 @@ static int samsung_i2s_dai_probe(struct snd_soc_dai *dai)
        if (i2s->quirks & QUIRK_NEED_RSTCLR)
                writel(CON_RSTCLR, i2s->addr + I2SCON);
 
+       if (i2s->quirks & QUIRK_SEC_DAI)
+               idma_reg_addr_init((void *)i2s->addr,
+                                       i2s->sec_dai->idma_playback.dma_addr);
+
 probe_exit:
        /* Reset any constraint on RFS and BFS */
        i2s->rfs = 0;
@@ -1077,6 +1083,7 @@ static __devinit int samsung_i2s_probe(struct platform_device *pdev)
                sec_dai->dma_playback.dma_size = 4;
                sec_dai->base = regs_base;
                sec_dai->quirks = quirks;
+               sec_dai->idma_playback.dma_addr = i2s_cfg->idma_addr;
                sec_dai->pri_dai = pri_dai;
                pri_dai->sec_dai = sec_dai;
        }
index e44267f..83ad8ca 100644 (file)
@@ -577,6 +577,7 @@ int snd_soc_suspend(struct device *dev)
                        case SND_SOC_BIAS_OFF:
                                codec->driver->suspend(codec, PMSG_SUSPEND);
                                codec->suspended = 1;
+                               codec->cache_sync = 1;
                                break;
                        default:
                                dev_dbg(codec->dev, "CODEC is on over suspend\n");
@@ -1140,7 +1141,7 @@ static int soc_probe_dai_link(struct snd_soc_card *card, int num, int order)
                        }
                }
                cpu_dai->probed = 1;
-               /* mark cpu_dai as probed and add to card cpu_dai list */
+               /* mark cpu_dai as probed and add to card dai list */
                list_add(&cpu_dai->card_list, &card->dai_dev_list);
        }
 
@@ -1171,7 +1172,7 @@ static int soc_probe_dai_link(struct snd_soc_card *card, int num, int order)
                        }
                }
 
-               /* mark cpu_dai as probed and add to card cpu_dai list */
+               /* mark codec_dai as probed and add to card dai list */
                codec_dai->probed = 1;
                list_add(&codec_dai->card_list, &card->dai_dev_list);
        }
index fbfcda0..7e15914 100644 (file)
@@ -124,6 +124,36 @@ static inline struct snd_soc_dapm_widget *dapm_cnew_widget(
        return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL);
 }
 
+/* get snd_card from DAPM context */
+static inline struct snd_card *dapm_get_snd_card(
+       struct snd_soc_dapm_context *dapm)
+{
+       if (dapm->codec)
+               return dapm->codec->card->snd_card;
+       else if (dapm->platform)
+               return dapm->platform->card->snd_card;
+       else
+               BUG();
+
+       /* unreachable */
+       return NULL;
+}
+
+/* get soc_card from DAPM context */
+static inline struct snd_soc_card *dapm_get_soc_card(
+               struct snd_soc_dapm_context *dapm)
+{
+       if (dapm->codec)
+               return dapm->codec->card;
+       else if (dapm->platform)
+               return dapm->platform->card;
+       else
+               BUG();
+
+       /* unreachable */
+       return NULL;
+}
+
 static int soc_widget_read(struct snd_soc_dapm_widget *w, int reg)
 {
        if (w->codec)