Merge tag 'erofs-for-5.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 30 Mar 2020 19:49:33 +0000 (12:49 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 30 Mar 2020 19:49:33 +0000 (12:49 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Mar 2020 19:49:33 +0000 (12:49 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Mar 2020 19:49:33 +0000 (12:49 -0700)
diff --git a/MAINTAINERS b/MAINTAINERS

index 953478c..bbec693 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6248,7 +6248,7 @@ F:        drivers/video/fbdev/s1d13xxxfb.c
  F:     include/video/s1d13xxxfb.h
  
  EROFS FILE SYSTEM
-M:     Gao Xiang <gaoxiang25@huawei.com>
+M:     Gao Xiang <xiang@kernel.org>
  M:     Chao Yu <yuchao0@huawei.com>
  L:     linux-erofs@lists.ozlabs.org
  S:     Maintained
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c

index 5779a15..5d2d819 100644 (file)
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -157,17 +157,27 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
                 }
         }
  
-       ret = LZ4_decompress_safe_partial(src + inputmargin, out,
-                                         inlen, rq->outputsize,
-                                         rq->outputsize);
-       if (ret < 0) {
-               erofs_err(rq->sb, "failed to decompress, in[%u, %u] out[%u]",
-                         inlen, inputmargin, rq->outputsize);
+       /* legacy format could compress extra data in a pcluster. */
+       if (rq->partial_decoding || !support_0padding)
+               ret = LZ4_decompress_safe_partial(src + inputmargin, out,
+                                                 inlen, rq->outputsize,
+                                                 rq->outputsize);
+       else
+               ret = LZ4_decompress_safe(src + inputmargin, out,
+                                         inlen, rq->outputsize);
+
+       if (ret != rq->outputsize) {
+               erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+                         ret, inlen, inputmargin, rq->outputsize);
+
                 WARN_ON(1);
                 print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
                                16, 1, src + inputmargin, inlen, true);
                 print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
                                16, 1, out, rq->outputsize, true);
+
+               if (ret >= 0)
+                       memset(out + ret, 0, rq->outputsize - ret);
                 ret = -EIO;
         }
  
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h

index c4c6dcd..5eead7f 100644 (file)
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -52,8 +52,8 @@ struct erofs_sb_info {
         struct list_head list;
         struct mutex umount_mutex;
  
-       /* the dedicated workstation for compression */
-       struct radix_tree_root workstn_tree;
+       /* managed XArray arranged in physical block number */
+       struct xarray managed_pslots;
  
         /* threshold for decompression synchronously */
         unsigned int max_sync_decompress_pages;
@@ -402,8 +402,8 @@ static inline void *erofs_get_pcpubuf(unsigned int pagenr)
  int erofs_workgroup_put(struct erofs_workgroup *grp);
  struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
                                              pgoff_t index);
-int erofs_register_workgroup(struct super_block *sb,
-                            struct erofs_workgroup *grp);
+struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
+                                              struct erofs_workgroup *grp);
  void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
  void erofs_shrinker_register(struct super_block *sb);
  void erofs_shrinker_unregister(struct super_block *sb);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c

index 057e6d7..b514c67 100644 (file)
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -425,7 +425,7 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent)
                 sb->s_flags &= ~SB_POSIXACL;
  
  #ifdef CONFIG_EROFS_FS_ZIP
-       INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
+       xa_init(&sbi->managed_pslots);
  #endif
  
         /* get the root inode */
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c

index fddc505..52d0be1 100644 (file)
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -37,9 +37,6 @@ void *erofs_get_pcpubuf(unsigned int pagenr)
  /* global shrink count (for all mounted EROFS instances) */
  static atomic_long_t erofs_global_shrink_cnt;
  
-#define __erofs_workgroup_get(grp)     atomic_inc(&(grp)->refcount)
-#define __erofs_workgroup_put(grp)     atomic_dec(&(grp)->refcount)
-
  static int erofs_workgroup_get(struct erofs_workgroup *grp)
  {
         int o;
@@ -66,7 +63,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
  
  repeat:
         rcu_read_lock();
-       grp = radix_tree_lookup(&sbi->workstn_tree, index);
+       grp = xa_load(&sbi->managed_pslots, index);
         if (grp) {
                 if (erofs_workgroup_get(grp)) {
                         /* prefer to relax rcu read side */
@@ -80,43 +77,37 @@ repeat:
         return grp;
  }
  
-int erofs_register_workgroup(struct super_block *sb,
-                            struct erofs_workgroup *grp)
+struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
+                                              struct erofs_workgroup *grp)
  {
-       struct erofs_sb_info *sbi;
-       int err;
-
-       /* grp shouldn't be broken or used before */
-       if (atomic_read(&grp->refcount) != 1) {
-               DBG_BUGON(1);
-               return -EINVAL;
-       }
-
-       err = radix_tree_preload(GFP_NOFS);
-       if (err)
-               return err;
-
-       sbi = EROFS_SB(sb);
-       xa_lock(&sbi->workstn_tree);
+       struct erofs_sb_info *const sbi = EROFS_SB(sb);
+       struct erofs_workgroup *pre;
  
         /*
-        * Bump up reference count before making this workgroup
-        * visible to other users in order to avoid potential UAF
-        * without serialized by workstn_lock.
+        * Bump up a reference count before making this visible
+        * to others for the XArray in order to avoid potential
+        * UAF without serialized by xa_lock.
          */
-       __erofs_workgroup_get(grp);
-
-       err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
-       if (err)
-               /*
-                * it's safe to decrease since the workgroup isn't visible
-                * and refcount >= 2 (cannot be freezed).
-                */
-               __erofs_workgroup_put(grp);
+       atomic_inc(&grp->refcount);
  
-       xa_unlock(&sbi->workstn_tree);
-       radix_tree_preload_end();
-       return err;
+repeat:
+       xa_lock(&sbi->managed_pslots);
+       pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
+                          NULL, grp, GFP_NOFS);
+       if (pre) {
+               if (xa_is_err(pre)) {
+                       pre = ERR_PTR(xa_err(pre));
+               } else if (erofs_workgroup_get(pre)) {
+                       /* try to legitimize the current in-tree one */
+                       xa_unlock(&sbi->managed_pslots);
+                       cond_resched();
+                       goto repeat;
+               }
+               atomic_dec(&grp->refcount);
+               grp = pre;
+       }
+       xa_unlock(&sbi->managed_pslots);
+       return grp;
  }
  
  static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
@@ -155,7 +146,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
  
         /*
          * Note that all cached pages should be unattached
-        * before deleted from the radix tree. Otherwise some
+        * before deleted from the XArray. Otherwise some
          * cached pages could be still attached to the orphan
          * old workgroup when the new one is available in the tree.
          */
@@ -169,7 +160,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
          * however in order to avoid some race conditions, add a
          * DBG_BUGON to observe this in advance.
          */
-       DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp);
+       DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
  
         /*
          * If managed cache is on, last refcount should indicate
@@ -182,22 +173,11 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
  static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
                                               unsigned long nr_shrink)
  {
-       pgoff_t first_index = 0;
-       void *batch[PAGEVEC_SIZE];
+       struct erofs_workgroup *grp;
         unsigned int freed = 0;
+       unsigned long index;
  
-       int i, found;
-repeat:
-       xa_lock(&sbi->workstn_tree);
-
-       found = radix_tree_gang_lookup(&sbi->workstn_tree,
-                                      batch, first_index, PAGEVEC_SIZE);
-
-       for (i = 0; i < found; ++i) {
-               struct erofs_workgroup *grp = batch[i];
-
-               first_index = grp->index + 1;
-
+       xa_for_each(&sbi->managed_pslots, index, grp) {
                 /* try to shrink each valid workgroup */
                 if (!erofs_try_to_release_workgroup(sbi, grp))
                         continue;
@@ -206,10 +186,6 @@ repeat:
                 if (!--nr_shrink)
                         break;
         }
-       xa_unlock(&sbi->workstn_tree);
-
-       if (i && nr_shrink)
-               goto repeat;
         return freed;
  }
  
@@ -286,7 +262,7 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink,
                 spin_unlock(&erofs_sb_list_lock);
                 sbi->shrinker_run_no = run_no;
  
-               freed += erofs_shrink_workstation(sbi, nr);
+               freed += erofs_shrink_workstation(sbi, nr - freed);
  
                 spin_lock(&erofs_sb_list_lock);
                 /* Get the next list element before we move this one */
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index 80e47f0..c4b6c9a 100644 (file)
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -67,16 +67,6 @@ static void z_erofs_pcluster_init_once(void *ptr)
                 pcl->compressed_pages[i] = NULL;
  }
  
-static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl)
-{
-       struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
-
-       atomic_set(&pcl->obj.refcount, 1);
-
-       DBG_BUGON(cl->nr_pages);
-       DBG_BUGON(cl->vcnt);
-}
-
  int __init z_erofs_init_zip_subsystem(void)
  {
         pcluster_cachep = kmem_cache_create("erofs_compress",
@@ -341,26 +331,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
                                      struct inode *inode,
                                      struct erofs_map_blocks *map)
  {
-       struct erofs_workgroup *grp;
-       struct z_erofs_pcluster *pcl;
+       struct z_erofs_pcluster *pcl = clt->pcl;
         struct z_erofs_collection *cl;
         unsigned int length;
  
-       grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
-       if (!grp)
-               return -ENOENT;
-
-       pcl = container_of(grp, struct z_erofs_pcluster, obj);
+       /* to avoid unexpected loop formed by corrupted images */
         if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
                 DBG_BUGON(1);
-               erofs_workgroup_put(grp);
                 return -EFSCORRUPTED;
         }
  
         cl = z_erofs_primarycollection(pcl);
         if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
                 DBG_BUGON(1);
-               erofs_workgroup_put(grp);
                 return -EFSCORRUPTED;
         }
  
@@ -368,7 +351,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
         if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
                 if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
                         DBG_BUGON(1);
-                       erofs_workgroup_put(grp);
                         return -EFSCORRUPTED;
                 }
         } else {
@@ -391,7 +373,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
         /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
         if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
                 clt->tailpcl = NULL;
-       clt->pcl = pcl;
         clt->cl = cl;
         return 0;
  }
@@ -402,6 +383,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
  {
         struct z_erofs_pcluster *pcl;
         struct z_erofs_collection *cl;
+       struct erofs_workgroup *grp;
         int err;
  
         /* no available workgroup, let's allocate one */
@@ -409,7 +391,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
         if (!pcl)
                 return -ENOMEM;
  
-       z_erofs_pcluster_init_always(pcl);
+       atomic_set(&pcl->obj.refcount, 1);
         pcl->obj.index = map->m_pa >> PAGE_SHIFT;
  
         pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
@@ -429,19 +411,29 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
         clt->mode = COLLECT_PRIMARY_FOLLOWED;
  
         cl = z_erofs_primarycollection(pcl);
+
+       /* must be cleaned before freeing to slab */
+       DBG_BUGON(cl->nr_pages);
+       DBG_BUGON(cl->vcnt);
+
         cl->pageofs = map->m_la & ~PAGE_MASK;
  
         /*
          * lock all primary followed works before visible to others
          * and mutex_trylock *never* fails for a new pcluster.
          */
-       mutex_trylock(&cl->lock);
+       DBG_BUGON(!mutex_trylock(&cl->lock));
  
-       err = erofs_register_workgroup(inode->i_sb, &pcl->obj);
-       if (err) {
-               mutex_unlock(&cl->lock);
-               kmem_cache_free(pcluster_cachep, pcl);
-               return -EAGAIN;
+       grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+       if (IS_ERR(grp)) {
+               err = PTR_ERR(grp);
+               goto err_out;
+       }
+
+       if (grp != &pcl->obj) {
+               clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+               err = -EEXIST;
+               goto err_out;
         }
         /* used to check tail merging loop due to corrupted images */
         if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
@@ -450,12 +442,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
         clt->pcl = pcl;
         clt->cl = cl;
         return 0;
+
+err_out:
+       mutex_unlock(&cl->lock);
+       kmem_cache_free(pcluster_cachep, pcl);
+       return err;
  }
  
  static int z_erofs_collector_begin(struct z_erofs_collector *clt,
                                    struct inode *inode,
                                    struct erofs_map_blocks *map)
  {
+       struct erofs_workgroup *grp;
         int ret;
  
         DBG_BUGON(clt->cl);
@@ -469,21 +467,25 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
                 return -EINVAL;
         }
  
-repeat:
-       ret = z_erofs_lookup_collection(clt, inode, map);
-       if (ret == -ENOENT) {
+       grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
+       if (grp) {
+               clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+       } else {
                 ret = z_erofs_register_collection(clt, inode, map);
  
-               /* someone registered at the same time, give another try */
-               if (ret == -EAGAIN) {
-                       cond_resched();
-                       goto repeat;
-               }
+               if (!ret)
+                       goto out;
+               if (ret != -EEXIST)
+                       return ret;
         }
  
-       if (ret)
+       ret = z_erofs_lookup_collection(clt, inode, map);
+       if (ret) {
+               erofs_workgroup_put(&clt->pcl->obj);
                 return ret;
+       }
  
+out:
         z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
                                   clt->cl->pagevec, clt->cl->vcnt);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 30 Mar 2020 19:49:33 +0000 (12:49 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 30 Mar 2020 19:49:33 +0000 (12:49 -0700)
MAINTAINERS		patch \| blob \| history
fs/erofs/decompressor.c		patch \| blob \| history
fs/erofs/internal.h		patch \| blob \| history
fs/erofs/super.c		patch \| blob \| history
fs/erofs/utils.c		patch \| blob \| history
fs/erofs/zdata.c		patch \| blob \| history