xfs: sync eofblocks scans under iolock are livelock prone

author Brian Foster <bfoster@redhat.com>

Sat, 28 Jan 2017 07:22:56 +0000 (23:22 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 8 Apr 2017 07:30:30 +0000 (09:30 +0200)
author Brian Foster <bfoster@redhat.com>
Sat, 28 Jan 2017 07:22:56 +0000 (23:22 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 8 Apr 2017 07:30:30 +0000 (09:30 +0200)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 9a5d64b5f35a1d82b2112a7b07998d7063584405..81b7fee40b5468b8a50eb541675af8bcbc8eb9c3 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -675,8 +675,10 @@ xfs_file_buffered_aio_write(
         struct xfs_inode        *ip = XFS_I(inode);
         ssize_t                 ret;
         int                     enospc = 0;
-       int                     iolock = XFS_IOLOCK_EXCL;
+       int                     iolock;
  
+write_retry:
+       iolock = XFS_IOLOCK_EXCL;
         xfs_rw_ilock(ip, iolock);
  
         ret = xfs_file_aio_write_checks(iocb, from, &iolock);
@@ -686,7 +688,6 @@ xfs_file_buffered_aio_write(
         /* We can write back this queue in page reclaim */
         current->backing_dev_info = inode_to_bdi(inode);
  
-write_retry:
         trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
         ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
         if (likely(ret >= 0))
@@ -702,18 +703,21 @@ write_retry:
          * running at the same time.
          */
         if (ret == -EDQUOT && !enospc) {
+               xfs_rw_iunlock(ip, iolock);
                 enospc = xfs_inode_free_quota_eofblocks(ip);
                 if (enospc)
                         goto write_retry;
                 enospc = xfs_inode_free_quota_cowblocks(ip);
                 if (enospc)
                         goto write_retry;
+               iolock = 0;
         } else if (ret == -ENOSPC && !enospc) {
                 struct xfs_eofblocks eofb = {0};
  
                 enospc = 1;
                 xfs_flush_inodes(ip->i_mount);
-               eofb.eof_scan_owner = ip->i_ino; /* for locking */
+
+               xfs_rw_iunlock(ip, iolock);
                 eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
                 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
                 goto write_retry;
@@ -721,7 +725,8 @@ write_retry:
  
         current->backing_dev_info = NULL;
  out:
-       xfs_rw_iunlock(ip, iolock);
+       if (iolock)
+               xfs_rw_iunlock(ip, iolock);
         return ret;
  }
  
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index e4b382aabf9f34215111bb199bf81b0a65bc1bf6..78708d001a635c415bcb2389846e2289c303b3ac 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1326,11 +1326,8 @@ xfs_inode_free_eofblocks(
  {
         int ret = 0;
         struct xfs_eofblocks *eofb = args;
-       bool need_iolock = true;
         int match;
  
-       ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
         if (!xfs_can_free_eofblocks(ip, false)) {
                 /* inode could be preallocated or append-only */
                 trace_xfs_inode_free_eofblocks_invalid(ip);
@@ -1358,27 +1355,19 @@ xfs_inode_free_eofblocks(
                 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
                     XFS_ISIZE(ip) < eofb->eof_min_file_size)
                         return 0;
-
-               /*
-                * A scan owner implies we already hold the iolock. Skip it here
-                * to avoid deadlock.
-                */
-               if (eofb->eof_scan_owner == ip->i_ino)
-                       need_iolock = false;
         }
  
         /*
          * If the caller is waiting, return -EAGAIN to keep the background
          * scanner moving and revisit the inode in a subsequent pass.
          */
-       if (need_iolock && !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
                 if (flags & SYNC_WAIT)
                         ret = -EAGAIN;
                 return ret;
         }
         ret = xfs_free_eofblocks(ip);
-       if (need_iolock)
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
  
         return ret;
  }
@@ -1425,15 +1414,10 @@ __xfs_inode_free_quota_eofblocks(
         struct xfs_eofblocks eofb = {0};
         struct xfs_dquot *dq;
  
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-
         /*
-        * Set the scan owner to avoid a potential livelock. Otherwise, the scan
-        * can repeatedly trylock on the inode we're currently processing. We
-        * run a sync scan to increase effectiveness and use the union filter to
+        * Run a sync scan to increase effectiveness and use the union filter to
          * cover all applicable quotas in a single scan.
          */
-       eofb.eof_scan_owner = ip->i_ino;
         eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
  
         if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
@@ -1585,12 +1569,9 @@ xfs_inode_free_cowblocks(
  {
         int ret;
         struct xfs_eofblocks *eofb = args;
-       bool need_iolock = true;
         int match;
         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
  
-       ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
         /*
          * Just clear the tag if we have an empty cow fork or none at all. It's
          * possible the inode was fully unshared since it was originally tagged.
@@ -1623,28 +1604,16 @@ xfs_inode_free_cowblocks(
                 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
                     XFS_ISIZE(ip) < eofb->eof_min_file_size)
                         return 0;
-
-               /*
-                * A scan owner implies we already hold the iolock. Skip it in
-                * xfs_free_eofblocks() to avoid deadlock. This also eliminates
-                * the possibility of EAGAIN being returned.
-                */
-               if (eofb->eof_scan_owner == ip->i_ino)
-                       need_iolock = false;
         }
  
         /* Free the CoW blocks */
-       if (need_iolock) {
-               xfs_ilock(ip, XFS_IOLOCK_EXCL);
-               xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
-       }
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
  
         ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
  
-       if (need_iolock) {
-               xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       }
+       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
  
         return ret;
  }
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h

index a1e02f4708abbfd6c942f4a5178709c58d681d8e..8a7c849b4deade24831871a28d76c9af5c7f3629 100644 (file)
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,7 +27,6 @@ struct xfs_eofblocks {
         kgid_t          eof_gid;
         prid_t          eof_prid;
         __u64           eof_min_file_size;
-       xfs_ino_t       eof_scan_owner;
  };
  
  #define SYNC_WAIT              0x0001  /* wait for i/o to complete */
@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user(
         dst->eof_flags = src->eof_flags;
         dst->eof_prid = src->eof_prid;
         dst->eof_min_file_size = src->eof_min_file_size;
-       dst->eof_scan_owner = NULLFSINO;
  
         dst->eof_uid = INVALID_UID;
         if (src->eof_flags & XFS_EOF_FLAGS_UID) {
author	Brian Foster <bfoster@redhat.com>
	Sat, 28 Jan 2017 07:22:56 +0000 (23:22 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 8 Apr 2017 07:30:30 +0000 (09:30 +0200)
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_icache.h		patch \| blob \| history