fs/erofs/utils.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2018 HUAWEI, Inc.
   4  *             https://www.huawei.com/
   5  */
   6 #include "internal.h"
   7
   8 struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
   9 {
  10         struct page *page = *pagepool;
  11
  12         if (page) {
  13                 DBG_BUGON(page_ref_count(page) != 1);
  14                 *pagepool = (struct page *)page_private(page);
  15         } else {
  16                 page = alloc_page(gfp);
  17         }
  18         return page;
  19 }
  20
  21 void erofs_release_pages(struct page **pagepool)
  22 {
  23         while (*pagepool) {
  24                 struct page *page = *pagepool;
  25
  26                 *pagepool = (struct page *)page_private(page);
  27                 put_page(page);
  28         }
  29 }
  30
  31 #ifdef CONFIG_EROFS_FS_ZIP
  32 /* global shrink count (for all mounted EROFS instances) */
  33 static atomic_long_t erofs_global_shrink_cnt;
  34
  35 static bool erofs_workgroup_get(struct erofs_workgroup *grp)
  36 {
  37         if (lockref_get_not_zero(&grp->lockref))
  38                 return true;
  39
  40         spin_lock(&grp->lockref.lock);
  41         if (__lockref_is_dead(&grp->lockref)) {
  42                 spin_unlock(&grp->lockref.lock);
  43                 return false;
  44         }
  45
  46         if (!grp->lockref.count++)
  47                 atomic_long_dec(&erofs_global_shrink_cnt);
  48         spin_unlock(&grp->lockref.lock);
  49         return true;
  50 }
  51
  52 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
  53                                              pgoff_t index)
  54 {
  55         struct erofs_sb_info *sbi = EROFS_SB(sb);
  56         struct erofs_workgroup *grp;
  57
  58 repeat:
  59         rcu_read_lock();
  60         grp = xa_load(&sbi->managed_pslots, index);
  61         if (grp) {
  62                 if (!erofs_workgroup_get(grp)) {
  63                         /* prefer to relax rcu read side */
  64                         rcu_read_unlock();
  65                         goto repeat;
  66                 }
  67
  68                 DBG_BUGON(index != grp->index);
  69         }
  70         rcu_read_unlock();
  71         return grp;
  72 }
  73
  74 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
  75                                                struct erofs_workgroup *grp)
  76 {
  77         struct erofs_sb_info *const sbi = EROFS_SB(sb);
  78         struct erofs_workgroup *pre;
  79
  80         /*
  81          * Bump up before making this visible to others for the XArray in order
  82          * to avoid potential UAF without serialized by xa_lock.
  83          */
  84         lockref_get(&grp->lockref);
  85
  86 repeat:
  87         xa_lock(&sbi->managed_pslots);
  88         pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
  89                            NULL, grp, GFP_NOFS);
  90         if (pre) {
  91                 if (xa_is_err(pre)) {
  92                         pre = ERR_PTR(xa_err(pre));
  93                 } else if (!erofs_workgroup_get(pre)) {
  94                         /* try to legitimize the current in-tree one */
  95                         xa_unlock(&sbi->managed_pslots);
  96                         cond_resched();
  97                         goto repeat;
  98                 }
  99                 lockref_put_return(&grp->lockref);
 100                 grp = pre;
 101         }
 102         xa_unlock(&sbi->managed_pslots);
 103         return grp;
 104 }
 105
 106 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
 107 {
 108         atomic_long_dec(&erofs_global_shrink_cnt);
 109         erofs_workgroup_free_rcu(grp);
 110 }
 111
 112 void erofs_workgroup_put(struct erofs_workgroup *grp)
 113 {
 114         if (lockref_put_or_lock(&grp->lockref))
 115                 return;
 116
 117         DBG_BUGON(__lockref_is_dead(&grp->lockref));
 118         if (grp->lockref.count == 1)
 119                 atomic_long_inc(&erofs_global_shrink_cnt);
 120         --grp->lockref.count;
 121         spin_unlock(&grp->lockref.lock);
 122 }
 123
 124 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
 125                                            struct erofs_workgroup *grp)
 126 {
 127         int free = false;
 128
 129         spin_lock(&grp->lockref.lock);
 130         if (grp->lockref.count)
 131                 goto out;
 132
 133         /*
 134          * Note that all cached pages should be detached before deleted from
 135          * the XArray. Otherwise some cached pages could be still attached to
 136          * the orphan old workgroup when the new one is available in the tree.
 137          */
 138         if (erofs_try_to_free_all_cached_pages(sbi, grp))
 139                 goto out;
 140
 141         /*
 142          * It's impossible to fail after the workgroup is freezed,
 143          * however in order to avoid some race conditions, add a
 144          * DBG_BUGON to observe this in advance.
 145          */
 146         DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
 147
 148         lockref_mark_dead(&grp->lockref);
 149         free = true;
 150 out:
 151         spin_unlock(&grp->lockref.lock);
 152         if (free)
 153                 __erofs_workgroup_free(grp);
 154         return free;
 155 }
 156
 157 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 158                                               unsigned long nr_shrink)
 159 {
 160         struct erofs_workgroup *grp;
 161         unsigned int freed = 0;
 162         unsigned long index;
 163
 164         xa_lock(&sbi->managed_pslots);
 165         xa_for_each(&sbi->managed_pslots, index, grp) {
 166                 /* try to shrink each valid workgroup */
 167                 if (!erofs_try_to_release_workgroup(sbi, grp))
 168                         continue;
 169                 xa_unlock(&sbi->managed_pslots);
 170
 171                 ++freed;
 172                 if (!--nr_shrink)
 173                         return freed;
 174                 xa_lock(&sbi->managed_pslots);
 175         }
 176         xa_unlock(&sbi->managed_pslots);
 177         return freed;
 178 }
 179
 180 /* protected by 'erofs_sb_list_lock' */
 181 static unsigned int shrinker_run_no;
 182
 183 /* protects the mounted 'erofs_sb_list' */
 184 static DEFINE_SPINLOCK(erofs_sb_list_lock);
 185 static LIST_HEAD(erofs_sb_list);
 186
 187 void erofs_shrinker_register(struct super_block *sb)
 188 {
 189         struct erofs_sb_info *sbi = EROFS_SB(sb);
 190
 191         mutex_init(&sbi->umount_mutex);
 192
 193         spin_lock(&erofs_sb_list_lock);
 194         list_add(&sbi->list, &erofs_sb_list);
 195         spin_unlock(&erofs_sb_list_lock);
 196 }
 197
 198 void erofs_shrinker_unregister(struct super_block *sb)
 199 {
 200         struct erofs_sb_info *const sbi = EROFS_SB(sb);
 201
 202         mutex_lock(&sbi->umount_mutex);
 203         /* clean up all remaining workgroups in memory */
 204         erofs_shrink_workstation(sbi, ~0UL);
 205
 206         spin_lock(&erofs_sb_list_lock);
 207         list_del(&sbi->list);
 208         spin_unlock(&erofs_sb_list_lock);
 209         mutex_unlock(&sbi->umount_mutex);
 210 }
 211
 212 static unsigned long erofs_shrink_count(struct shrinker *shrink,
 213                                         struct shrink_control *sc)
 214 {
 215         return atomic_long_read(&erofs_global_shrink_cnt);
 216 }
 217
 218 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
 219                                        struct shrink_control *sc)
 220 {
 221         struct erofs_sb_info *sbi;
 222         struct list_head *p;
 223
 224         unsigned long nr = sc->nr_to_scan;
 225         unsigned int run_no;
 226         unsigned long freed = 0;
 227
 228         spin_lock(&erofs_sb_list_lock);
 229         do {
 230                 run_no = ++shrinker_run_no;
 231         } while (run_no == 0);
 232
 233         /* Iterate over all mounted superblocks and try to shrink them */
 234         p = erofs_sb_list.next;
 235         while (p != &erofs_sb_list) {
 236                 sbi = list_entry(p, struct erofs_sb_info, list);
 237
 238                 /*
 239                  * We move the ones we do to the end of the list, so we stop
 240                  * when we see one we have already done.
 241                  */
 242                 if (sbi->shrinker_run_no == run_no)
 243                         break;
 244
 245                 if (!mutex_trylock(&sbi->umount_mutex)) {
 246                         p = p->next;
 247                         continue;
 248                 }
 249
 250                 spin_unlock(&erofs_sb_list_lock);
 251                 sbi->shrinker_run_no = run_no;
 252
 253                 freed += erofs_shrink_workstation(sbi, nr - freed);
 254
 255                 spin_lock(&erofs_sb_list_lock);
 256                 /* Get the next list element before we move this one */
 257                 p = p->next;
 258
 259                 /*
 260                  * Move this one to the end of the list to provide some
 261                  * fairness.
 262                  */
 263                 list_move_tail(&sbi->list, &erofs_sb_list);
 264                 mutex_unlock(&sbi->umount_mutex);
 265
 266                 if (freed >= nr)
 267                         break;
 268         }
 269         spin_unlock(&erofs_sb_list_lock);
 270         return freed;
 271 }
 272
 273 static struct shrinker erofs_shrinker_info = {
 274         .scan_objects = erofs_shrink_scan,
 275         .count_objects = erofs_shrink_count,
 276         .seeks = DEFAULT_SEEKS,
 277 };
 278
 279 int __init erofs_init_shrinker(void)
 280 {
 281         return register_shrinker(&erofs_shrinker_info, "erofs-shrinker");
 282 }
 283
 284 void erofs_exit_shrinker(void)
 285 {
 286         unregister_shrinker(&erofs_shrinker_info);
 287 }
 288 #endif  /* !CONFIG_EROFS_FS_ZIP */