NFSv4.1: Fix a race in nfs4_write_inode
authorTrond Myklebust <trond.myklebust@primarydata.com>
Mon, 13 Jan 2014 18:34:36 +0000 (13:34 -0500)
committerTrond Myklebust <trond.myklebust@primarydata.com>
Mon, 13 Jan 2014 18:34:36 +0000 (13:34 -0500)
nfs4_write_inode() must not be allowed to exit until the layoutcommit
is done. That means that both NFS_INO_LAYOUTCOMMIT and
NFS_INO_LAYOUTCOMMITTING have to be cleared.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
fs/nfs/nfs4super.c
fs/nfs/pnfs.c

index 65ab0a0..808f295 100644 (file)
@@ -77,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        int ret = nfs_write_inode(inode, wbc);
 
-       if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
-               int status;
-               bool sync = true;
-
-               if (wbc->sync_mode == WB_SYNC_NONE)
-                       sync = false;
-
-               status = pnfs_layoutcommit_inode(inode, sync);
-               if (status < 0)
-                       return status;
-       }
+       if (ret == 0)
+               ret = pnfs_layoutcommit_inode(inode,
+                               wbc->sync_mode == WB_SYNC_ALL);
        return ret;
 }
 
index d75d938..4755858 100644 (file)
@@ -1790,6 +1790,15 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
 
+static void pnfs_clear_layoutcommitting(struct inode *inode)
+{
+       unsigned long *bitlock = &NFS_I(inode)->flags;
+
+       clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+       smp_mb__after_clear_bit();
+       wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
 /*
  * There can be multiple RW segments.
  */
@@ -1807,7 +1816,6 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
 {
        struct pnfs_layout_segment *lseg, *tmp;
-       unsigned long *bitlock = &NFS_I(inode)->flags;
 
        /* Matched by references in pnfs_set_layoutcommit */
        list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
@@ -1815,9 +1823,7 @@ static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *lis
                pnfs_put_lseg(lseg);
        }
 
-       clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-       smp_mb__after_clear_bit();
-       wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+       pnfs_clear_layoutcommitting(inode);
 }
 
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
@@ -1881,43 +1887,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
        struct nfs4_layoutcommit_data *data;
        struct nfs_inode *nfsi = NFS_I(inode);
        loff_t end_pos;
-       int status = 0;
+       int status;
 
-       dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
-
-       if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+       if (!pnfs_layoutcommit_outstanding(inode))
                return 0;
 
-       /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
-       data = kzalloc(sizeof(*data), GFP_NOFS);
-       if (!data) {
-               status = -ENOMEM;
-               goto out;
-       }
-
-       if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
-               goto out_free;
+       dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
 
+       status = -EAGAIN;
        if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
-               if (!sync) {
-                       status = -EAGAIN;
-                       goto out_free;
-               }
-               status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
-                                       nfs_wait_bit_killable, TASK_KILLABLE);
+               if (!sync)
+                       goto out;
+               status = wait_on_bit_lock(&nfsi->flags,
+                               NFS_INO_LAYOUTCOMMITTING,
+                               nfs_wait_bit_killable,
+                               TASK_KILLABLE);
                if (status)
-                       goto out_free;
+                       goto out;
        }
 
-       INIT_LIST_HEAD(&data->lseg_list);
+       status = -ENOMEM;
+       /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
+       data = kzalloc(sizeof(*data), GFP_NOFS);
+       if (!data)
+               goto clear_layoutcommitting;
+
+       status = 0;
        spin_lock(&inode->i_lock);
-       if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
-               clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
-               spin_unlock(&inode->i_lock);
-               wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
-               goto out_free;
-       }
+       if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+               goto out_unlock;
 
+       INIT_LIST_HEAD(&data->lseg_list);
        pnfs_list_write_lseg(inode, &data->lseg_list);
 
        end_pos = nfsi->layout->plh_lwb;
@@ -1940,8 +1940,11 @@ out:
                mark_inode_dirty_sync(inode);
        dprintk("<-- %s status %d\n", __func__, status);
        return status;
-out_free:
+out_unlock:
+       spin_unlock(&inode->i_lock);
        kfree(data);
+clear_layoutcommitting:
+       pnfs_clear_layoutcommitting(inode);
        goto out;
 }