Merge tag 'gfs2-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 8 Jun 2020 19:47:09 +0000 (12:47 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 8 Jun 2020 19:47:09 +0000 (12:47 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jun 2020 19:47:09 +0000 (12:47 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jun 2020 19:47:09 +0000 (12:47 -0700)
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.rst

similarity index 63%

rename from Documentation/filesystems/gfs2-glocks.txt

rename to Documentation/filesystems/gfs2-glocks.rst

index 7059623..d14f230 100644 (file)
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.rst
@@ -1,5 +1,8 @@
-                   Glock internal locking rules
-                  ------------------------------
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Glock internal locking rules
+============================
  
  This documents the basic principles of the glock state machine
  internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h)
@@ -24,24 +27,28 @@ There are three lock states that users of the glock layer can request,
  namely shared (SH), deferred (DF) and exclusive (EX). Those translate
  to the following DLM lock modes:
  
-Glock mode    | DLM lock mode
-------------------------------
-    UN        |    IV/NL  Unlocked (no DLM lock associated with glock) or NL
-    SH        |    PR     (Protected read)
-    DF        |    CW     (Concurrent write)
-    EX        |    EX     (Exclusive)
+==========     ====== =====================================================
+Glock mode      DLM    lock mode
+==========     ====== =====================================================
+    UN          IV/NL  Unlocked (no DLM lock associated with glock) or NL
+    SH          PR     (Protected read)
+    DF          CW     (Concurrent write)
+    EX          EX     (Exclusive)
+==========     ====== =====================================================
  
  Thus DF is basically a shared mode which is incompatible with the "normal"
  shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O
  operations. The glocks are basically a lock plus some routines which deal
  with cache management. The following rules apply for the cache:
  
-Glock mode   |  Cache data | Cache Metadata | Dirty Data | Dirty Metadata
---------------------------------------------------------------------------
-    UN       |     No      |       No       |     No     |      No
-    SH       |     Yes     |       Yes      |     No     |      No
-    DF       |     No      |       Yes      |     No     |      No
-    EX       |     Yes     |       Yes      |     Yes    |      Yes
+==========      ==========   ==============   ==========   ==============
+Glock mode      Cache data   Cache Metadata   Dirty Data   Dirty Metadata
+==========      ==========   ==============   ==========   ==============
+    UN             No              No             No            No
+    SH             Yes             Yes            No            No
+    DF             No              Yes            No            No
+    EX             Yes             Yes            Yes           Yes
+==========      ==========   ==============   ==========   ==============
  
  These rules are implemented using the various glock operations which
  are defined for each type of glock. Not all types of glocks use
@@ -49,21 +56,23 @@ all the modes. Only inode glocks use the DF mode for example.
  
  Table of glock operations and per type constants:
  
-Field            | Purpose
-----------------------------------------------------------------------------
-go_xmote_th      | Called before remote state change (e.g. to sync dirty data)
-go_xmote_bh      | Called after remote state change (e.g. to refill cache)
-go_inval         | Called if remote state change requires invalidating the cache
-go_demote_ok     | Returns boolean value of whether its ok to demote a glock
-                 | (e.g. checks timeout, and that there is no cached data)
-go_lock          | Called for the first local holder of a lock
-go_unlock        | Called on the final local unlock of a lock
-go_dump          | Called to print content of object for debugfs file, or on
-                 | error to dump glock to the log.
-go_type          | The type of the glock, LM_TYPE_.....
-go_callback     | Called if the DLM sends a callback to drop this lock
-go_flags        | GLOF_ASPACE is set, if the glock has an address space
-                 | associated with it
+=============      =============================================================
+Field              Purpose
+=============      =============================================================
+go_xmote_th        Called before remote state change (e.g. to sync dirty data)
+go_xmote_bh        Called after remote state change (e.g. to refill cache)
+go_inval           Called if remote state change requires invalidating the cache
+go_demote_ok       Returns boolean value of whether its ok to demote a glock
+                   (e.g. checks timeout, and that there is no cached data)
+go_lock            Called for the first local holder of a lock
+go_unlock          Called on the final local unlock of a lock
+go_dump            Called to print content of object for debugfs file, or on
+                   error to dump glock to the log.
+go_type            The type of the glock, ``LM_TYPE_*``
+go_callback       Called if the DLM sends a callback to drop this lock
+go_flags          GLOF_ASPACE is set, if the glock has an address space
+                   associated with it
+=============      =============================================================
  
  The minimum hold time for each lock is the time after a remote lock
  grant for which we ignore remote demote requests. This is in order to
@@ -82,21 +91,25 @@ rather than via the glock.
  
  Locking rules for glock operations:
  
-Operation     |  GLF_LOCK bit lock held |  gl_lockref.lock spinlock held
--------------------------------------------------------------------------
-go_xmote_th   |       Yes               |       No
-go_xmote_bh   |       Yes               |       No
-go_inval      |       Yes               |       No
-go_demote_ok  |       Sometimes         |       Yes
-go_lock       |       Yes               |       No
-go_unlock     |       Yes               |       No
-go_dump       |       Sometimes         |       Yes
-go_callback   |       Sometimes (N/A)   |       Yes
-
-N.B. Operations must not drop either the bit lock or the spinlock
-if its held on entry. go_dump and do_demote_ok must never block.
-Note that go_dump will only be called if the glock's state
-indicates that it is caching uptodate data.
+=============    ======================    =============================
+Operation        GLF_LOCK bit lock held    gl_lockref.lock spinlock held
+=============    ======================    =============================
+go_xmote_th           Yes                       No
+go_xmote_bh           Yes                       No
+go_inval              Yes                       No
+go_demote_ok          Sometimes                 Yes
+go_lock               Yes                       No
+go_unlock             Yes                       No
+go_dump               Sometimes                 Yes
+go_callback           Sometimes (N/A)           Yes
+=============    ======================    =============================
+
+.. Note::
+
+   Operations must not drop either the bit lock or the spinlock
+   if its held on entry. go_dump and do_demote_ok must never block.
+   Note that go_dump will only be called if the glock's state
+   indicates that it is caching uptodate data.
  
  Glock locking order within GFS2:
  
@@ -104,7 +117,7 @@ Glock locking order within GFS2:
   2. Rename glock (for rename only)
   3. Inode glock(s)
      (Parents before children, inodes at "same level" with same parent in
-     lock number order)
+    lock number order)
   4. Rgrp glock(s) (for (de)allocation operations)
   5. Transaction glock (via gfs2_trans_begin) for non-read operations
   6. i_rw_mutex (if required)
@@ -117,8 +130,8 @@ determine the lifetime of the inode in question. Locking of inodes
  is on a per-inode basis. Locking of rgrps is on a per rgrp basis.
  In general we prefer to lock local locks prior to cluster locks.
  
-                            Glock Statistics
-                           ------------------
+Glock Statistics
+----------------
  
  The stats are divided into two sets: those relating to the
  super block and those relating to an individual glock. The
@@ -173,8 +186,8 @@ we'd like to get a better idea of these timings:
  1. To be able to better set the glock "min hold time"
  2. To spot performance issues more easily
  3. To improve the algorithm for selecting resource groups for
-allocation (to base it on lock wait time, rather than blindly
-using a "try lock")
+   allocation (to base it on lock wait time, rather than blindly
+   using a "try lock")
  
  Due to the smoothing action of the updates, a step change in
  some input quantity being sampled will only fully be taken
@@ -195,10 +208,13 @@ as possible. There are always inaccuracies in any
  measuring system, but I hope this is as accurate as we
  can reasonably make it.
  
-Per sb stats can be found here:
-/sys/kernel/debug/gfs2/<fsname>/sbstats
-Per glock stats can be found here:
-/sys/kernel/debug/gfs2/<fsname>/glstats
+Per sb stats can be found here::
+
+    /sys/kernel/debug/gfs2/<fsname>/sbstats
+
+Per glock stats can be found here::
+
+    /sys/kernel/debug/gfs2/<fsname>/glstats
  
  Assuming that debugfs is mounted on /sys/kernel/debug and also
  that <fsname> is replaced with the name of the gfs2 filesystem
@@ -206,14 +222,16 @@ in question.
  
  The abbreviations used in the output as are follows:
  
-srtt     - Smoothed round trip time for non-blocking dlm requests
-srttvar  - Variance estimate for srtt
-srttb    - Smoothed round trip time for (potentially) blocking dlm requests
-srttvarb - Variance estimate for srttb
-sirt     - Smoothed inter-request time (for dlm requests)
-sirtvar  - Variance estimate for sirt
-dlm      - Number of dlm requests made (dcnt in glstats file)
-queue    - Number of glock requests queued (qcnt in glstats file)
+=========  ================================================================
+srtt       Smoothed round trip time for non blocking dlm requests
+srttvar    Variance estimate for srtt
+srttb      Smoothed round trip time for (potentially) blocking dlm requests
+srttvarb   Variance estimate for srttb
+sirt       Smoothed inter request time (for dlm requests)
+sirtvar    Variance estimate for sirt
+dlm        Number of dlm requests made (dcnt in glstats file)
+queue      Number of glock requests queued (qcnt in glstats file)
+=========  ================================================================
  
  The sbstats file contains a set of these stats for each glock type (so 8 lines
  for each type) and for each cpu (one column per cpu). The glstats file contains
@@ -224,9 +242,12 @@ The gfs2_glock_lock_time tracepoint prints out the current values of the stats
  for the glock in question, along with some addition information on each dlm
  reply that is received:
  
-status - The status of the dlm request
-flags  - The dlm request flags
-tdiff  - The time taken by this specific request
+======   =======================================
+status   The status of the dlm request
+flags    The dlm request flags
+tdiff    The time taken by this specific request
+======   =======================================
+
  (remaining fields as per above list)
  
  
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst

index 1779534..4c536e6 100644 (file)
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -88,6 +88,7 @@ Documentation for filesystem implementations.
     f2fs
     gfs2
     gfs2-uevents
+   gfs2-glocks
     hfs
     hfsplus
     hpfs
diff --git a/MAINTAINERS b/MAINTAINERS

index 3ad3001..ad076dd 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7251,7 +7251,7 @@ L:        cluster-devel@redhat.com
  S:     Supported
  W:     http://sources.redhat.com/cluster/
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
-F:     Documentation/filesystems/gfs2*.txt
+F:     Documentation/filesystems/gfs2*
  F:     fs/gfs2/
  F:     include/uapi/linux/gfs2_ondisk.h
  
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c

index 3f71728..756d057 100644 (file)
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -134,7 +134,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
         struct gfs2_sbd *sdp = sb->s_fs_info;
         struct inode *inode;
  
-       inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
+       if (!inum->no_formal_ino)
+               return ERR_PTR(-ESTALE);
+       inode = gfs2_lookup_by_inum(sdp, inum->no_addr, inum->no_formal_ino,
                                     GFS2_BLKST_DINODE);
         if (IS_ERR(inode))
                 return ERR_CAST(inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c

index bf70e3b..2299dcc 100644 (file)
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -125,12 +125,11 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
  {
         struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
  
-       if (gl->gl_ops->go_flags & GLOF_ASPACE) {
+       kfree(gl->gl_lksb.sb_lvbptr);
+       if (gl->gl_ops->go_flags & GLOF_ASPACE)
                 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
-       } else {
-               kfree(gl->gl_lksb.sb_lvbptr);
+       else
                 kmem_cache_free(gfs2_glock_cachep, gl);
-       }
  }
  
  /**
@@ -164,7 +163,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
  {
         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  
-       BUG_ON(atomic_read(&gl->gl_revokes));
+       gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0);
         rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
         smp_mb();
         wake_up_glock(gl);
@@ -465,6 +464,15 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
         gl->gl_tchange = jiffies;
  }
  
+static void gfs2_set_demote(struct gfs2_glock *gl)
+{
+       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+       set_bit(GLF_DEMOTE, &gl->gl_flags);
+       smp_mb();
+       wake_up(&sdp->sd_async_glock_wait);
+}
+
  static void gfs2_demote_wake(struct gfs2_glock *gl)
  {
         gl->gl_demote_state = LM_ST_EXCLUSIVE;
@@ -626,7 +634,8 @@ __acquires(&gl->gl_lockref.lock)
                  */
                 if ((atomic_read(&gl->gl_ail_count) != 0) &&
                     (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
-                       gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count));
+                       gfs2_glock_assert_warn(gl,
+                                              !atomic_read(&gl->gl_ail_count));
                         gfs2_dump_glock(NULL, gl, true);
                 }
                 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
@@ -756,20 +765,127 @@ out_unlock:
         return;
  }
  
+void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
+{
+       struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
+
+       if (ri->ri_magic == 0)
+               ri->ri_magic = cpu_to_be32(GFS2_MAGIC);
+       if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC))
+               ri->ri_generation_deleted = cpu_to_be64(generation);
+}
+
+bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation)
+{
+       struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
+
+       if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC))
+               return false;
+       return generation <= be64_to_cpu(ri->ri_generation_deleted);
+}
+
+static void gfs2_glock_poke(struct gfs2_glock *gl)
+{
+       int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP;
+       struct gfs2_holder gh;
+       int error;
+
+       error = gfs2_glock_nq_init(gl, LM_ST_SHARED, flags, &gh);
+       if (!error)
+               gfs2_glock_dq(&gh);
+}
+
+static bool gfs2_try_evict(struct gfs2_glock *gl)
+{
+       struct gfs2_inode *ip;
+       bool evicted = false;
+
+       /*
+        * If there is contention on the iopen glock and we have an inode, try
+        * to grab and release the inode so that it can be evicted.  This will
+        * allow the remote node to go ahead and delete the inode without us
+        * having to do it, which will avoid rgrp glock thrashing.
+        *
+        * The remote node is likely still holding the corresponding inode
+        * glock, so it will run before we get to verify that the delete has
+        * happened below.
+        */
+       spin_lock(&gl->gl_lockref.lock);
+       ip = gl->gl_object;
+       if (ip && !igrab(&ip->i_inode))
+               ip = NULL;
+       spin_unlock(&gl->gl_lockref.lock);
+       if (ip) {
+               struct gfs2_glock *inode_gl = NULL;
+
+               gl->gl_no_formal_ino = ip->i_no_formal_ino;
+               set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
+               d_prune_aliases(&ip->i_inode);
+               iput(&ip->i_inode);
+
+               /* If the inode was evicted, gl->gl_object will now be NULL. */
+               spin_lock(&gl->gl_lockref.lock);
+               ip = gl->gl_object;
+               if (ip) {
+                       inode_gl = ip->i_gl;
+                       lockref_get(&inode_gl->gl_lockref);
+                       clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
+               }
+               spin_unlock(&gl->gl_lockref.lock);
+               if (inode_gl) {
+                       gfs2_glock_poke(inode_gl);
+                       gfs2_glock_put(inode_gl);
+               }
+               evicted = !ip;
+       }
+       return evicted;
+}
+
  static void delete_work_func(struct work_struct *work)
  {
-       struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete);
         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
         struct inode *inode;
         u64 no_addr = gl->gl_name.ln_number;
  
+       spin_lock(&gl->gl_lockref.lock);
+       clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+       spin_unlock(&gl->gl_lockref.lock);
+
         /* If someone's using this glock to create a new dinode, the block must
            have been freed by another node, then re-used, in which case our
            iopen callback is too late after the fact. Ignore it. */
         if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
                 goto out;
  
-       inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+       if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
+               /*
+                * If we can evict the inode, give the remote node trying to
+                * delete the inode some time before verifying that the delete
+                * has happened.  Otherwise, if we cause contention on the inode glock
+                * immediately, the remote node will think that we still have
+                * the inode in use, and so it will give up waiting.
+                *
+                * If we can't evict the inode, signal to the remote node that
+                * the inode is still in use.  We'll later try to delete the
+                * inode locally in gfs2_evict_inode.
+                *
+                * FIXME: We only need to verify that the remote node has
+                * deleted the inode because nodes before this remote delete
+                * rework won't cooperate.  At a later time, when we no longer
+                * care about compatibility with such nodes, we can skip this
+                * step entirely.
+                */
+               if (gfs2_try_evict(gl)) {
+                       if (gfs2_queue_delete_work(gl, 5 * HZ))
+                               return;
+               }
+               goto out;
+       }
+
+       inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
+                                   GFS2_BLKST_UNLINKED);
         if (!IS_ERR_OR_NULL(inode)) {
                 d_prune_aliases(inode);
                 iput(inode);
@@ -800,7 +916,7 @@ static void glock_work_func(struct work_struct *work)
  
                 if (!delay) {
                         clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
-                       set_bit(GLF_DEMOTE, &gl->gl_flags);
+                       gfs2_set_demote(gl);
                 }
         }
         run_queue(gl, 0);
@@ -931,7 +1047,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
         gl->gl_object = NULL;
         gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
         INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
-       INIT_WORK(&gl->gl_delete, delete_work_func);
+       INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
  
         mapping = gfs2_glock2aspace(gl);
         if (mapping) {
@@ -1145,9 +1261,10 @@ wait_for_dlm:
  static void handle_callback(struct gfs2_glock *gl, unsigned int state,
                             unsigned long delay, bool remote)
  {
-       int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
-
-       set_bit(bit, &gl->gl_flags);
+       if (delay)
+               set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
+       else
+               gfs2_set_demote(gl);
         if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
                 gl->gl_demote_state = state;
                 gl->gl_demote_time = jiffies;
@@ -1754,6 +1871,44 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
         rhashtable_walk_exit(&iter);
  }
  
+bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
+{
+       bool queued;
+
+       spin_lock(&gl->gl_lockref.lock);
+       queued = queue_delayed_work(gfs2_delete_workqueue,
+                                   &gl->gl_delete, delay);
+       if (queued)
+               set_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+       spin_unlock(&gl->gl_lockref.lock);
+       return queued;
+}
+
+void gfs2_cancel_delete_work(struct gfs2_glock *gl)
+{
+       if (cancel_delayed_work_sync(&gl->gl_delete)) {
+               clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+               gfs2_glock_put(gl);
+       }
+}
+
+bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
+{
+       return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+}
+
+static void flush_delete_work(struct gfs2_glock *gl)
+{
+       flush_delayed_work(&gl->gl_delete);
+       gfs2_glock_queue_work(gl, 0);
+}
+
+void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
+{
+       glock_hash_walk(flush_delete_work, sdp);
+       flush_workqueue(gfs2_delete_workqueue);
+}
+
  /**
   * thaw_glock - thaw out a glock which has an unprocessed reply waiting
   * @gl: The glock to thaw
@@ -1836,7 +1991,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
         int ret;
  
         ret = gfs2_truncatei_resume(ip);
-       gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
+       gfs2_glock_assert_withdraw(gl, ret == 0);
  
         spin_lock(&gl->gl_lockref.lock);
         clear_bit(GLF_LOCK, &gl->gl_flags);
@@ -1978,7 +2133,13 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
         char gflags_buf[32];
         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
+       unsigned long nrpages = 0;
+
+       if (gl->gl_ops->go_flags & GLOF_ASPACE) {
+               struct address_space *mapping = gfs2_glock2aspace(gl);
  
+               nrpages = mapping->nrpages;
+       }
         memset(fs_id_buf, 0, sizeof(fs_id_buf));
         if (fsid && sdp) /* safety precaution */
                 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
@@ -1987,15 +2148,16 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
         if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
                 dtime = 0;
         gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
-                      "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
-                 gl->gl_name.ln_type,
-                 (unsigned long long)gl->gl_name.ln_number,
-                 gflags2str(gflags_buf, gl),
-                 state2str(gl->gl_target),
-                 state2str(gl->gl_demote_state), dtime,
-                 atomic_read(&gl->gl_ail_count),
-                 atomic_read(&gl->gl_revokes),
-                 (int)gl->gl_lockref.count, gl->gl_hold_time);
+                      "v:%d r:%d m:%ld p:%lu\n",
+                      fs_id_buf, state2str(gl->gl_state),
+                      gl->gl_name.ln_type,
+                      (unsigned long long)gl->gl_name.ln_number,
+                      gflags2str(gflags_buf, gl),
+                      state2str(gl->gl_target),
+                      state2str(gl->gl_demote_state), dtime,
+                      atomic_read(&gl->gl_ail_count),
+                      atomic_read(&gl->gl_revokes),
+                      (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages);
  
         list_for_each_entry(gh, &gl->gl_holders, gh_list)
                 dump_holder(seq, gh, fs_id_buf);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h

index b8adaf8..5381336 100644 (file)
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -205,6 +205,15 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
  #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) {             \
                         gfs2_dump_glock(NULL, gl, true);        \
                         BUG(); } } while(0)
+#define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) {       \
+                       gfs2_dump_glock(NULL, gl, true);                \
+                       gfs2_assert_warn((gl)->gl_name.ln_sbd, (x)); } } \
+       while (0)
+#define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) {   \
+                       gfs2_dump_glock(NULL, gl, true);                \
+                       gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \
+       while (0)
+
  extern __printf(2, 3)
  void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
  
@@ -235,6 +244,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
  
  extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
  extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
+extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay);
+extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
+extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
+extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
  extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
  extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
  extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
@@ -306,4 +319,7 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
         spin_unlock(&gl->gl_lockref.lock);
  }
  
+extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
+extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
+
  #endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c

index 9e9c7a4..c848877 100644 (file)
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -91,6 +91,8 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
         memset(&tr, 0, sizeof(tr));
         INIT_LIST_HEAD(&tr.tr_buf);
         INIT_LIST_HEAD(&tr.tr_databuf);
+       INIT_LIST_HEAD(&tr.tr_ail1_list);
+       INIT_LIST_HEAD(&tr.tr_ail2_list);
         tr.tr_revokes = atomic_read(&gl->gl_ail_count);
  
         if (!tr.tr_revokes) {
@@ -268,7 +270,7 @@ static int inode_go_sync(struct gfs2_glock *gl)
         struct gfs2_inode *ip = gfs2_glock2inode(gl);
         int isreg = ip && S_ISREG(ip->i_inode.i_mode);
         struct address_space *metamapping = gfs2_glock2aspace(gl);
-       int error = 0;
+       int error = 0, ret;
  
         if (isreg) {
                 if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
@@ -289,8 +291,10 @@ static int inode_go_sync(struct gfs2_glock *gl)
                 error = filemap_fdatawait(mapping);
                 mapping_set_error(mapping, error);
         }
-       error = filemap_fdatawait(metamapping);
-       mapping_set_error(metamapping, error);
+       ret = filemap_fdatawait(metamapping);
+       mapping_set_error(metamapping, ret);
+       if (!error)
+               error = ret;
         gfs2_ail_empty_gl(gl);
         /*
          * Writeback of the data mapping may cause the dirty flag to be set
@@ -608,11 +612,17 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
         if (gl->gl_demote_state == LM_ST_UNLOCKED &&
             gl->gl_state == LM_ST_SHARED && ip) {
                 gl->gl_lockref.count++;
-               if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+               if (!queue_delayed_work(gfs2_delete_workqueue,
+                                       &gl->gl_delete, 0))
                         gl->gl_lockref.count--;
         }
  }
  
+static int iopen_go_demote_ok(const struct gfs2_glock *gl)
+{
+       return !gfs2_delete_work_queued(gl);
+}
+
  /**
   * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
   * @gl: glock being freed
@@ -692,7 +702,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
         .go_lock = inode_go_lock,
         .go_dump = inode_go_dump,
         .go_type = LM_TYPE_INODE,
-       .go_flags = GLOF_ASPACE | GLOF_LRU,
+       .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB,
         .go_free = inode_go_free,
  };
  
@@ -716,6 +726,7 @@ const struct gfs2_glock_operations gfs2_freeze_glops = {
  const struct gfs2_glock_operations gfs2_iopen_glops = {
         .go_type = LM_TYPE_IOPEN,
         .go_callback = iopen_go_callback,
+       .go_demote_ok = iopen_go_demote_ok,
         .go_flags = GLOF_LRU | GLOF_NONDISK,
  };
  
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h

index 84a8242..03ab11f 100644 (file)
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -345,6 +345,7 @@ enum {
         GLF_OBJECT                      = 14, /* Used only for tracing */
         GLF_BLOCKING                    = 15,
         GLF_INODE_CREATING              = 16, /* Inode creation occurring */
+       GLF_PENDING_DELETE              = 17,
         GLF_FREEING                     = 18, /* Wait for glock to be freed */
  };
  
@@ -378,8 +379,11 @@ struct gfs2_glock {
         atomic_t gl_revokes;
         struct delayed_work gl_work;
         union {
-               /* For inode and iopen glocks only */
-               struct work_struct gl_delete;
+               /* For iopen glocks only */
+               struct {
+                       struct delayed_work gl_delete;
+                       u64 gl_no_formal_ino;
+               };
                 /* For rgrp glocks only */
                 struct {
                         loff_t start;
@@ -398,6 +402,7 @@ enum {
         GIF_ORDERED             = 4,
         GIF_FREE_VFS_INODE      = 5,
         GIF_GLOP_PENDING        = 6,
+       GIF_DEFERRED_DELETE     = 7,
  };
  
  struct gfs2_inode {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c

index a1337bf..370c3a4 100644 (file)
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -115,6 +115,10 @@ static void gfs2_set_iop(struct inode *inode)
   * placeholder because it doesn't otherwise make sense), the on-disk block type
   * is verified to be @blktype.
   *
+ * When @no_formal_ino is non-zero, this function will return ERR_PTR(-ESTALE)
+ * if it detects that @no_formal_ino doesn't match the actual inode generation
+ * number.  However, it doesn't always know unless @type is DT_UNKNOWN.
+ *
   * Returns: A VFS inode, or an error
   */
  
@@ -158,6 +162,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
                         if (error)
                                 goto fail;
  
+                       error = -ESTALE;
+                       if (no_formal_ino &&
+                           gfs2_inode_already_deleted(ip->i_gl, no_formal_ino))
+                               goto fail;
+
                         if (blktype != GFS2_BLKST_FREE) {
                                 error = gfs2_check_blk_type(sdp, no_addr,
                                                             blktype);
@@ -171,6 +180,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
                 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
                 if (unlikely(error))
                         goto fail;
+               gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
                 glock_set_object(ip->i_iopen_gh.gh_gl, ip);
                 gfs2_glock_put(io_gl);
                 io_gl = NULL;
@@ -189,13 +199,23 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
                         inode->i_mode = DT2IF(type);
                 }
  
+               if (gfs2_holder_initialized(&i_gh))
+                       gfs2_glock_dq_uninit(&i_gh);
+
                 gfs2_set_iop(inode);
+       }
  
-               unlock_new_inode(inode);
+       if (no_formal_ino && ip->i_no_formal_ino &&
+           no_formal_ino != ip->i_no_formal_ino) {
+               if (inode->i_state & I_NEW)
+                       goto fail;
+               iput(inode);
+               return ERR_PTR(-ESTALE);
         }
  
-       if (gfs2_holder_initialized(&i_gh))
-               gfs2_glock_dq_uninit(&i_gh);
+       if (inode->i_state & I_NEW)
+               unlock_new_inode(inode);
+
         return inode;
  
  fail:
@@ -207,23 +227,26 @@ fail:
         return ERR_PTR(error);
  }
  
+/**
+ * gfs2_lookup_by_inum - look up an inode by inode number
+ * @sdp: The super block
+ * @no_addr: The inode number
+ * @no_formal_ino: The inode generation number (0 for any)
+ * @blktype: Requested block type (see gfs2_inode_lookup)
+ */
  struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
-                                 u64 *no_formal_ino, unsigned int blktype)
+                                 u64 no_formal_ino, unsigned int blktype)
  {
         struct super_block *sb = sdp->sd_vfs;
         struct inode *inode;
         int error;
  
-       inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype);
+       inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, no_formal_ino,
+                                 blktype);
         if (IS_ERR(inode))
                 return inode;
  
-       /* Two extra checks for NFS only */
         if (no_formal_ino) {
-               error = -ESTALE;
-               if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
-                       goto fail_iput;
-
                 error = -EIO;
                 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
                         goto fail_iput;
@@ -725,6 +748,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
         if (error)
                 goto fail_gunlock2;
  
+       gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
         glock_set_object(ip->i_iopen_gh.gh_gl, ip);
         gfs2_set_iop(inode);
         insert_inode_hash(inode);
@@ -781,7 +805,8 @@ fail_gunlock2:
  fail_free_inode:
         if (ip->i_gl) {
                 glock_clear_object(ip->i_gl, ip);
-               gfs2_glock_put(ip->i_gl);
+               if (free_vfs_inode) /* else evict will do the put for us */
+                       gfs2_glock_put(ip->i_gl);
         }
         gfs2_rs_delete(ip, NULL);
         gfs2_qa_put(ip);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h

index 580adbf..b52ecf4 100644 (file)
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -92,7 +92,7 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
                                        u64 no_addr, u64 no_formal_ino,
                                        unsigned int blktype);
  extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
-                                        u64 *no_formal_ino,
+                                        u64 no_formal_ino,
                                          unsigned int blktype);
  
  extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c

index 0644e58..3e47344 100644 (file)
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -30,6 +30,7 @@
  #include "util.h"
  #include "dir.h"
  #include "trace_gfs2.h"
+#include "trans.h"
  
  static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
  
@@ -145,9 +146,6 @@ static void dump_ail_list(struct gfs2_sbd *sdp)
         struct gfs2_bufdata *bd;
         struct buffer_head *bh;
  
-       fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
-              current->journal_info ? 1 : 0);
-
         list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
                 list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
                                             bd_ail_st_list) {
@@ -197,6 +195,8 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
  restart:
         ret = 0;
         if (time_after(jiffies, flush_start + (HZ * 600))) {
+               fs_err(sdp, "Error: In %s for ten minutes! t=%d\n",
+                      __func__, current->journal_info ? 1 : 0);
                 dump_ail_list(sdp);
                 goto out;
         }
@@ -379,7 +379,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
                 list_del(&tr->tr_list);
                 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
                 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
-               kfree(tr);
+               gfs2_trans_free(sdp, tr);
         }
  
         spin_unlock(&sdp->sd_ail_lock);
@@ -864,19 +864,41 @@ static void ail_drain(struct gfs2_sbd *sdp)
                 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
                 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
                 list_del(&tr->tr_list);
-               kfree(tr);
+               gfs2_trans_free(sdp, tr);
         }
         while (!list_empty(&sdp->sd_ail2_list)) {
                 tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
                                       tr_list);
                 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
                 list_del(&tr->tr_list);
-               kfree(tr);
+               gfs2_trans_free(sdp, tr);
         }
         spin_unlock(&sdp->sd_ail_lock);
  }
  
  /**
+ * empty_ail1_list - try to start IO and empty the ail1 list
+ * @sdp: Pointer to GFS2 superblock
+ */
+static void empty_ail1_list(struct gfs2_sbd *sdp)
+{
+       unsigned long start = jiffies;
+
+       for (;;) {
+               if (time_after(jiffies, start + (HZ * 600))) {
+                       fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
+                              __func__, current->journal_info ? 1 : 0);
+                       dump_ail_list(sdp);
+                       return;
+               }
+               gfs2_ail1_start(sdp);
+               gfs2_ail1_wait(sdp);
+               if (gfs2_ail1_empty(sdp, 0))
+                       return;
+       }
+}
+
+/**
   * gfs2_log_flush - flush incore transaction(s)
   * @sdp: the filesystem
   * @gl: The glock structure to flush.  If NULL, flush the whole incore log
@@ -912,8 +934,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
         tr = sdp->sd_log_tr;
         if (tr) {
                 sdp->sd_log_tr = NULL;
-               INIT_LIST_HEAD(&tr->tr_ail1_list);
-               INIT_LIST_HEAD(&tr->tr_ail2_list);
                 tr->tr_first = sdp->sd_log_flush_head;
                 if (unlikely (state == SFS_FROZEN))
                         if (gfs2_assert_withdraw_delayed(sdp,
@@ -965,12 +985,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
  
         if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
                 if (!sdp->sd_log_idle) {
-                       for (;;) {
-                               gfs2_ail1_start(sdp);
-                               gfs2_ail1_wait(sdp);
-                               if (gfs2_ail1_empty(sdp, 0))
-                                       break;
-                       }
+                       empty_ail1_list(sdp);
                         if (gfs2_withdrawn(sdp))
                                 goto out;
                         atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
@@ -994,7 +1009,7 @@ out:
         trace_gfs2_log_flush(sdp, 0, flags);
         up_write(&sdp->sd_log_flush_lock);
  
-       kfree(tr);
+       gfs2_trans_free(sdp, tr);
  }
  
  /**
@@ -1003,8 +1018,10 @@ out:
   * @new: New transaction to be merged
   */
  
-static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
+static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
  {
+       struct gfs2_trans *old = sdp->sd_log_tr;
+
         WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
  
         old->tr_num_buf_new     += new->tr_num_buf_new;
@@ -1016,6 +1033,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
  
         list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
         list_splice_tail_init(&new->tr_buf, &old->tr_buf);
+
+       spin_lock(&sdp->sd_ail_lock);
+       list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
+       list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
+       spin_unlock(&sdp->sd_ail_lock);
  }
  
  static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -1027,7 +1049,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
         gfs2_log_lock(sdp);
  
         if (sdp->sd_log_tr) {
-               gfs2_merge_trans(sdp->sd_log_tr, tr);
+               gfs2_merge_trans(sdp, tr);
         } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
                 gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
                 sdp->sd_log_tr = tr;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c

index a1a295b..733470c 100644 (file)
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -143,6 +143,12 @@ static int __init init_gfs2_fs(void)
         if (!gfs2_qadata_cachep)
                 goto fail_cachep7;
  
+       gfs2_trans_cachep = kmem_cache_create("gfs2_trans",
+                                              sizeof(struct gfs2_trans),
+                                              0, 0, NULL);
+       if (!gfs2_trans_cachep)
+               goto fail_cachep8;
+
         error = register_shrinker(&gfs2_qd_shrinker);
         if (error)
                 goto fail_shrinker;
@@ -194,6 +200,8 @@ fail_fs2:
  fail_fs1:
         unregister_shrinker(&gfs2_qd_shrinker);
  fail_shrinker:
+       kmem_cache_destroy(gfs2_trans_cachep);
+fail_cachep8:
         kmem_cache_destroy(gfs2_qadata_cachep);
  fail_cachep7:
         kmem_cache_destroy(gfs2_quotad_cachep);
@@ -236,6 +244,7 @@ static void __exit exit_gfs2_fs(void)
         rcu_barrier();
  
         mempool_destroy(gfs2_page_pool);
+       kmem_cache_destroy(gfs2_trans_cachep);
         kmem_cache_destroy(gfs2_qadata_cachep);
         kmem_cache_destroy(gfs2_quotad_cachep);
         kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c

index e2b69ff..094f5fe 100644 (file)
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -880,7 +880,7 @@ fail:
  }
  
  static const match_table_t nolock_tokens = {
-       { Opt_jid, "jid=%d\n", },
+       { Opt_jid, "jid=%d", },
         { Opt_err, NULL },
  };
  
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c

index a321c34..074f228 100644 (file)
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1835,7 +1835,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
                  */
                 ip = gl->gl_object;
  
-               if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+               if (ip || !gfs2_queue_delete_work(gl, 0))
                         gfs2_glock_put(gl);
                 else
                         found++;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c

index 956fced..32d8d26 100644 (file)
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -626,7 +626,7 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
                 }
         }
  
-       flush_workqueue(gfs2_delete_workqueue);
+       gfs2_flush_delete_work(sdp);
         if (!log_write_allowed && current == sdp->sd_quotad_process)
                 fs_warn(sdp, "The quotad daemon is withdrawing.\n");
         else if (sdp->sd_quotad_process)
@@ -1054,7 +1054,7 @@ static int gfs2_drop_inode(struct inode *inode)
                 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
  
                 gfs2_glock_hold(gl);
-               if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+               if (!gfs2_queue_delete_work(gl, 0))
                         gfs2_glock_queue_put(gl);
                 return false;
         }
@@ -1258,6 +1258,55 @@ static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
                 gfs2_glock_put(gl);
  }
  
+static bool gfs2_upgrade_iopen_glock(struct inode *inode)
+{
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       struct gfs2_holder *gh = &ip->i_iopen_gh;
+       long timeout = 5 * HZ;
+       int error;
+
+       gh->gh_flags |= GL_NOCACHE;
+       gfs2_glock_dq_wait(gh);
+
+       /*
+        * If there are no other lock holders, we'll get the lock immediately.
+        * Otherwise, the other nodes holding the lock will be notified about
+        * our locking request.  If they don't have the inode open, they'll
+        * evict the cached inode and release the lock.  Otherwise, if they
+        * poke the inode glock, we'll take this as an indication that they
+        * still need the iopen glock and that they'll take care of deleting
+        * the inode when they're done.  As a last resort, if another node
+        * keeps holding the iopen glock without showing any activity on the
+        * inode glock, we'll eventually time out.
+        *
+        * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
+        * locking request as an optimization to notify lock holders as soon as
+        * possible.  Without that flag, they'd be notified implicitly by the
+        * second locking request.
+        */
+
+       gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
+       error = gfs2_glock_nq(gh);
+       if (error != GLR_TRYFAILED)
+               return !error;
+
+       gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
+       error = gfs2_glock_nq(gh);
+       if (error)
+               return false;
+
+       timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
+               !test_bit(HIF_WAIT, &gh->gh_iflags) ||
+               test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
+               timeout);
+       if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
+               gfs2_glock_dq(gh);
+               return false;
+       }
+       return true;
+}
+
  /**
   * gfs2_evict_inode - Remove an inode from cache
   * @inode: The inode to evict
@@ -1299,9 +1348,12 @@ static void gfs2_evict_inode(struct inode *inode)
         if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
                 BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
                 gfs2_holder_mark_uninitialized(&gh);
-               goto alloc_failed;
+               goto out_delete;
         }
  
+       if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
+               goto out;
+
         /* Deletes should never happen under memory pressure anymore.  */
         if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                 goto out;
@@ -1315,6 +1367,8 @@ static void gfs2_evict_inode(struct inode *inode)
                 goto out;
         }
  
+       if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
+               goto out_truncate;
         error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
         if (error)
                 goto out_truncate;
@@ -1331,16 +1385,13 @@ static void gfs2_evict_inode(struct inode *inode)
         if (inode->i_nlink)
                 goto out_truncate;
  
-alloc_failed:
+out_delete:
         if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
             test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
-               ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
-               gfs2_glock_dq_wait(&ip->i_iopen_gh);
-               gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE,
-                                  &ip->i_iopen_gh);
-               error = gfs2_glock_nq(&ip->i_iopen_gh);
-               if (error)
+               if (!gfs2_upgrade_iopen_glock(inode)) {
+                       gfs2_holder_uninit(&ip->i_iopen_gh);
                         goto out_truncate;
+               }
         }
  
         if (S_ISDIR(inode->i_mode) &&
@@ -1368,6 +1419,7 @@ alloc_failed:
            that subsequent inode creates don't see an old gl_object. */
         glock_clear_object(ip->i_gl, ip);
         error = gfs2_dinode_dealloc(ip);
+       gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
         goto out_unlock;
  
  out_truncate:
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c

index ffe8405..a3dfa3a 100644 (file)
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -37,7 +37,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
         if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                 return -EROFS;
  
-       tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
+       tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
         if (!tr)
                 return -ENOMEM;
  
@@ -52,6 +52,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
                 tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
         INIT_LIST_HEAD(&tr->tr_databuf);
         INIT_LIST_HEAD(&tr->tr_buf);
+       INIT_LIST_HEAD(&tr->tr_ail1_list);
+       INIT_LIST_HEAD(&tr->tr_ail2_list);
  
         sb_start_intwrite(sdp->sd_vfs);
  
@@ -65,7 +67,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
  
  fail:
         sb_end_intwrite(sdp->sd_vfs);
-       kfree(tr);
+       kmem_cache_free(gfs2_trans_cachep, tr);
  
         return error;
  }
@@ -93,7 +95,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
         if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
                 gfs2_log_release(sdp, tr->tr_reserved);
                 if (alloced) {
-                       kfree(tr);
+                       gfs2_trans_free(sdp, tr);
                         sb_end_intwrite(sdp->sd_vfs);
                 }
                 return;
@@ -109,7 +111,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
  
         gfs2_log_commit(sdp, tr);
         if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
-               kfree(tr);
+               gfs2_trans_free(sdp, tr);
         up_read(&sdp->sd_log_flush_lock);
  
         if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
@@ -276,3 +278,14 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
         gfs2_log_unlock(sdp);
  }
  
+void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+       if (tr == NULL)
+               return;
+
+       gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
+       gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
+       gfs2_assert_warn(sdp, list_empty(&tr->tr_databuf));
+       gfs2_assert_warn(sdp, list_empty(&tr->tr_buf));
+       kmem_cache_free(gfs2_trans_cachep, tr);
+}
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h

index 6071334..83199ce 100644 (file)
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -42,5 +42,6 @@ extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
  extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
  extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
  extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
+extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
  
  #endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c

index aa087a5..1cd0328 100644 (file)
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -32,6 +32,7 @@ struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
  struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
  struct kmem_cache *gfs2_quotad_cachep __read_mostly;
  struct kmem_cache *gfs2_qadata_cachep __read_mostly;
+struct kmem_cache *gfs2_trans_cachep __read_mostly;
  mempool_t *gfs2_page_pool __read_mostly;
  
  void gfs2_assert_i(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h

index a354256..6d9157e 100644 (file)
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -172,6 +172,7 @@ extern struct kmem_cache *gfs2_bufdata_cachep;
  extern struct kmem_cache *gfs2_rgrpd_cachep;
  extern struct kmem_cache *gfs2_quotad_cachep;
  extern struct kmem_cache *gfs2_qadata_cachep;
+extern struct kmem_cache *gfs2_trans_cachep;
  extern mempool_t *gfs2_page_pool;
  extern struct workqueue_struct *gfs2_control_wq;
  
diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h

index 2dc10a0..07e508e 100644 (file)
--- a/include/uapi/linux/gfs2_ondisk.h
+++ b/include/uapi/linux/gfs2_ondisk.h
@@ -171,6 +171,12 @@ struct gfs2_rindex {
  #define GFS2_RGF_NOALLOC       0x00000008
  #define GFS2_RGF_TRIMMED       0x00000010
  
+struct gfs2_inode_lvb {
+       __be32 ri_magic;
+       __be32 __pad;
+       __be64 ri_generation_deleted;
+};
+
  struct gfs2_rgrp_lvb {
         __be32 rl_magic;
         __be32 rl_flags;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 8 Jun 2020 19:47:09 +0000 (12:47 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 8 Jun 2020 19:47:09 +0000 (12:47 -0700)
Documentation/filesystems/gfs2-glocks.rst	[moved from Documentation/filesystems/gfs2-glocks.txt with 63% similarity]	patch \| blob \| history
Documentation/filesystems/index.rst		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
fs/gfs2/export.c		patch \| blob \| history
fs/gfs2/glock.c		patch \| blob \| history
fs/gfs2/glock.h		patch \| blob \| history
fs/gfs2/glops.c		patch \| blob \| history
fs/gfs2/incore.h		patch \| blob \| history
fs/gfs2/inode.c		patch \| blob \| history
fs/gfs2/inode.h		patch \| blob \| history
fs/gfs2/log.c		patch \| blob \| history
fs/gfs2/main.c		patch \| blob \| history
fs/gfs2/ops_fstype.c		patch \| blob \| history
fs/gfs2/rgrp.c		patch \| blob \| history
fs/gfs2/super.c		patch \| blob \| history
fs/gfs2/trans.c		patch \| blob \| history
fs/gfs2/trans.h		patch \| blob \| history
fs/gfs2/util.c		patch \| blob \| history
fs/gfs2/util.h		patch \| blob \| history
include/uapi/linux/gfs2_ondisk.h		patch \| blob \| history