erofs: use struct lockref to replace handcrafted approach

author Gao Xiang <hsiangkao@linux.alibaba.com>

Mon, 29 May 2023 12:37:27 +0000 (20:37 +0800)

committer Gao Xiang <hsiangkao@linux.alibaba.com>

Sun, 18 Jun 2023 04:10:47 +0000 (12:10 +0800)
author Gao Xiang <hsiangkao@linux.alibaba.com>
Mon, 29 May 2023 12:37:27 +0000 (20:37 +0800)
committer Gao Xiang <hsiangkao@linux.alibaba.com>
Sun, 18 Jun 2023 04:10:47 +0000 (12:10 +0800)
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h

index 005f3391bcd3cca1f9081d2637460ff9adf40324..36e32fa542f005a5877f51f49d84a4b14a129fbd 100644 (file)
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -208,46 +208,12 @@ enum {
         EROFS_ZIP_CACHE_READAROUND
  };
  
-#define EROFS_LOCKED_MAGIC     (INT_MIN | 0xE0F510CCL)
-
  /* basic unit of the workstation of a super_block */
  struct erofs_workgroup {
-       /* the workgroup index in the workstation */
         pgoff_t index;
-
-       /* overall workgroup reference count */
-       atomic_t refcount;
+       struct lockref lockref;
  };
  
-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
-                                                int val)
-{
-       preempt_disable();
-       if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
-               preempt_enable();
-               return false;
-       }
-       return true;
-}
-
-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
-                                           int orig_val)
-{
-       /*
-        * other observers should notice all modifications
-        * in the freezing period.
-        */
-       smp_mb();
-       atomic_set(&grp->refcount, orig_val);
-       preempt_enable();
-}
-
-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
-{
-       return atomic_cond_read_relaxed(&grp->refcount,
-                                       VAL != EROFS_LOCKED_MAGIC);
-}
-
  enum erofs_kmap_type {
         EROFS_NO_KMAP,          /* don't map the buffer */
         EROFS_KMAP,             /* use kmap_local_page() to map the buffer */
@@ -486,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
  void erofs_release_pages(struct page **pagepool);
  
  #ifdef CONFIG_EROFS_FS_ZIP
-int erofs_workgroup_put(struct erofs_workgroup *grp);
+void erofs_workgroup_put(struct erofs_workgroup *grp);
  struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
                                              pgoff_t index);
  struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c

index 46627cb69abe9adbed15c02e6eca41a21338532e..cc6fb9e98899174662a77bca865846d1884e726b 100644 (file)
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -4,7 +4,6 @@
   *             https://www.huawei.com/
   */
  #include "internal.h"
-#include <linux/pagevec.h>
  
  struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
  {
@@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool)
  /* global shrink count (for all mounted EROFS instances) */
  static atomic_long_t erofs_global_shrink_cnt;
  
-static int erofs_workgroup_get(struct erofs_workgroup *grp)
+static bool erofs_workgroup_get(struct erofs_workgroup *grp)
  {
-       int o;
+       if (lockref_get_not_zero(&grp->lockref))
+               return true;
  
-repeat:
-       o = erofs_wait_on_workgroup_freezed(grp);
-       if (o <= 0)
-               return -1;
-
-       if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
-               goto repeat;
+       spin_lock(&grp->lockref.lock);
+       if (__lockref_is_dead(&grp->lockref)) {
+               spin_unlock(&grp->lockref.lock);
+               return false;
+       }
  
-       /* decrease refcount paired by erofs_workgroup_put */
-       if (o == 1)
+       if (!grp->lockref.count++)
                 atomic_long_dec(&erofs_global_shrink_cnt);
-       return 0;
+       spin_unlock(&grp->lockref.lock);
+       return true;
  }
  
  struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
@@ -61,7 +59,7 @@ repeat:
         rcu_read_lock();
         grp = xa_load(&sbi->managed_pslots, index);
         if (grp) {
-               if (erofs_workgroup_get(grp)) {
+               if (!erofs_workgroup_get(grp)) {
                         /* prefer to relax rcu read side */
                         rcu_read_unlock();
                         goto repeat;
@@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
         struct erofs_workgroup *pre;
  
         /*
-        * Bump up a reference count before making this visible
-        * to others for the XArray in order to avoid potential
-        * UAF without serialized by xa_lock.
+        * Bump up before making this visible to others for the XArray in order
+        * to avoid potential UAF without serialized by xa_lock.
          */
-       atomic_inc(&grp->refcount);
+       lockref_get(&grp->lockref);
  
  repeat:
         xa_lock(&sbi->managed_pslots);
@@ -93,13 +90,13 @@ repeat:
         if (pre) {
                 if (xa_is_err(pre)) {
                         pre = ERR_PTR(xa_err(pre));
-               } else if (erofs_workgroup_get(pre)) {
+               } else if (!erofs_workgroup_get(pre)) {
                         /* try to legitimize the current in-tree one */
                         xa_unlock(&sbi->managed_pslots);
                         cond_resched();
                         goto repeat;
                 }
-               atomic_dec(&grp->refcount);
+               lockref_put_return(&grp->lockref);
                 grp = pre;
         }
         xa_unlock(&sbi->managed_pslots);
@@ -112,38 +109,34 @@ static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
         erofs_workgroup_free_rcu(grp);
  }
  
-int erofs_workgroup_put(struct erofs_workgroup *grp)
+void erofs_workgroup_put(struct erofs_workgroup *grp)
  {
-       int count = atomic_dec_return(&grp->refcount);
+       if (lockref_put_or_lock(&grp->lockref))
+               return;
  
-       if (count == 1)
+       DBG_BUGON(__lockref_is_dead(&grp->lockref));
+       if (grp->lockref.count == 1)
                 atomic_long_inc(&erofs_global_shrink_cnt);
-       else if (!count)
-               __erofs_workgroup_free(grp);
-       return count;
+       --grp->lockref.count;
+       spin_unlock(&grp->lockref.lock);
  }
  
  static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
                                            struct erofs_workgroup *grp)
  {
-       /*
-        * If managed cache is on, refcount of workgroups
-        * themselves could be < 0 (freezed). In other words,
-        * there is no guarantee that all refcounts > 0.
-        */
-       if (!erofs_workgroup_try_to_freeze(grp, 1))
-               return false;
+       int free = false;
+
+       spin_lock(&grp->lockref.lock);
+       if (grp->lockref.count)
+               goto out;
  
         /*
-        * Note that all cached pages should be unattached
-        * before deleted from the XArray. Otherwise some
-        * cached pages could be still attached to the orphan
-        * old workgroup when the new one is available in the tree.
+        * Note that all cached pages should be detached before deleted from
+        * the XArray. Otherwise some cached pages could be still attached to
+        * the orphan old workgroup when the new one is available in the tree.
          */
-       if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
-               erofs_workgroup_unfreeze(grp, 1);
-               return false;
-       }
+       if (erofs_try_to_free_all_cached_pages(sbi, grp))
+               goto out;
  
         /*
          * It's impossible to fail after the workgroup is freezed,
@@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
          */
         DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
  
-       /* last refcount should be connected with its managed pslot.  */
-       erofs_workgroup_unfreeze(grp, 0);
-       __erofs_workgroup_free(grp);
-       return true;
+       lockref_mark_dead(&grp->lockref);
+       free = true;
+out:
+       spin_unlock(&grp->lockref.lock);
+       if (free)
+               __erofs_workgroup_free(grp);
+       return free;
  }
  
  static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index c556906354e548ea778ca9df1693fc792ad900eb..637a964ff110c2c571eddaa2a1409960a4030546 100644 (file)
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -641,7 +641,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
  
         DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
         /*
-        * refcount of workgroup is now freezed as 1,
+        * refcount of workgroup is now freezed as 0,
          * therefore no need to worry about available decompression users.
          */
         for (i = 0; i < pcl->pclusterpages; ++i) {
@@ -674,10 +674,11 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
         if (!folio_test_private(folio))
                 return true;
  
-       if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
-               return false;
-
         ret = false;
+       spin_lock(&pcl->obj.lockref.lock);
+       if (pcl->obj.lockref.count > 0)
+               goto out;
+
         DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
         for (i = 0; i < pcl->pclusterpages; ++i) {
                 if (pcl->compressed_bvecs[i].page == &folio->page) {
@@ -686,10 +687,10 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
                         break;
                 }
         }
-       erofs_workgroup_unfreeze(&pcl->obj, 1);
-
         if (ret)
                 folio_detach_private(folio);
+out:
+       spin_unlock(&pcl->obj.lockref.lock);
         return ret;
  }
  
@@ -805,7 +806,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
         if (IS_ERR(pcl))
                 return PTR_ERR(pcl);
  
-       atomic_set(&pcl->obj.refcount, 1);
+       spin_lock_init(&pcl->obj.lockref.lock);
         pcl->algorithmformat = map->m_algorithmformat;
         pcl->length = 0;
         pcl->partial = true;
author	Gao Xiang <hsiangkao@linux.alibaba.com>
	Mon, 29 May 2023 12:37:27 +0000 (20:37 +0800)
committer	Gao Xiang <hsiangkao@linux.alibaba.com>
	Sun, 18 Jun 2023 04:10:47 +0000 (12:10 +0800)
fs/erofs/internal.h		patch \| blob \| history
fs/erofs/utils.c		patch \| blob \| history
fs/erofs/zdata.c		patch \| blob \| history