LXR linux/fs/erofs/utils.c

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2018 HUAWEI, Inc.
   4 *             http://www.huawei.com/
   5 * Created by Gao Xiang <gaoxiang25@huawei.com>
   6 */
   7#include "internal.h"
   8#include <linux/pagevec.h>
   9
  10struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail)
  11{
  12        struct page *page;
  13
  14        if (!list_empty(pool)) {
  15                page = lru_to_page(pool);
  16                DBG_BUGON(page_ref_count(page) != 1);
  17                list_del(&page->lru);
  18        } else {
  19                page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0);
  20        }
  21        return page;
  22}
  23
  24#if (EROFS_PCPUBUF_NR_PAGES > 0)
  25static struct {
  26        u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
  27} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
  28
  29void *erofs_get_pcpubuf(unsigned int pagenr)
  30{
  31        preempt_disable();
  32        return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
  33}
  34#endif
  35
  36#ifdef CONFIG_EROFS_FS_ZIP
  37/* global shrink count (for all mounted EROFS instances) */
  38static atomic_long_t erofs_global_shrink_cnt;
  39
  40#define __erofs_workgroup_get(grp)      atomic_inc(&(grp)->refcount)
  41#define __erofs_workgroup_put(grp)      atomic_dec(&(grp)->refcount)
  42
  43static int erofs_workgroup_get(struct erofs_workgroup *grp)
  44{
  45        int o;
  46
  47repeat:
  48        o = erofs_wait_on_workgroup_freezed(grp);
  49        if (o <= 0)
  50                return -1;
  51
  52        if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
  53                goto repeat;
  54
  55        /* decrease refcount paired by erofs_workgroup_put */
  56        if (o == 1)
  57                atomic_long_dec(&erofs_global_shrink_cnt);
  58        return 0;
  59}
  60
  61struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
  62                                             pgoff_t index, bool *tag)
  63{
  64        struct erofs_sb_info *sbi = EROFS_SB(sb);
  65        struct erofs_workgroup *grp;
  66
  67repeat:
  68        rcu_read_lock();
  69        grp = radix_tree_lookup(&sbi->workstn_tree, index);
  70        if (grp) {
  71                *tag = xa_pointer_tag(grp);
  72                grp = xa_untag_pointer(grp);
  73
  74                if (erofs_workgroup_get(grp)) {
  75                        /* prefer to relax rcu read side */
  76                        rcu_read_unlock();
  77                        goto repeat;
  78                }
  79
  80                DBG_BUGON(index != grp->index);
  81        }
  82        rcu_read_unlock();
  83        return grp;
  84}
  85
  86int erofs_register_workgroup(struct super_block *sb,
  87                             struct erofs_workgroup *grp,
  88                             bool tag)
  89{
  90        struct erofs_sb_info *sbi;
  91        int err;
  92
  93        /* grp shouldn't be broken or used before */
  94        if (atomic_read(&grp->refcount) != 1) {
  95                DBG_BUGON(1);
  96                return -EINVAL;
  97        }
  98
  99        err = radix_tree_preload(GFP_NOFS);
 100        if (err)
 101                return err;
 102
 103        sbi = EROFS_SB(sb);
 104        xa_lock(&sbi->workstn_tree);
 105
 106        grp = xa_tag_pointer(grp, tag);
 107
 108        /*
 109         * Bump up reference count before making this workgroup
 110         * visible to other users in order to avoid potential UAF
 111         * without serialized by workstn_lock.
 112         */
 113        __erofs_workgroup_get(grp);
 114
 115        err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
 116        if (err)
 117                /*
 118                 * it's safe to decrease since the workgroup isn't visible
 119                 * and refcount >= 2 (cannot be freezed).
 120                 */
 121                __erofs_workgroup_put(grp);
 122
 123        xa_unlock(&sbi->workstn_tree);
 124        radix_tree_preload_end();
 125        return err;
 126}
 127
 128static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
 129{
 130        atomic_long_dec(&erofs_global_shrink_cnt);
 131        erofs_workgroup_free_rcu(grp);
 132}
 133
 134int erofs_workgroup_put(struct erofs_workgroup *grp)
 135{
 136        int count = atomic_dec_return(&grp->refcount);
 137
 138        if (count == 1)
 139                atomic_long_inc(&erofs_global_shrink_cnt);
 140        else if (!count)
 141                __erofs_workgroup_free(grp);
 142        return count;
 143}
 144
 145static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
 146{
 147        erofs_workgroup_unfreeze(grp, 0);
 148        __erofs_workgroup_free(grp);
 149}
 150
 151static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
 152                                           struct erofs_workgroup *grp,
 153                                           bool cleanup)
 154{
 155        /*
 156         * If managed cache is on, refcount of workgroups
 157         * themselves could be < 0 (freezed). In other words,
 158         * there is no guarantee that all refcounts > 0.
 159         */
 160        if (!erofs_workgroup_try_to_freeze(grp, 1))
 161                return false;
 162
 163        /*
 164         * Note that all cached pages should be unattached
 165         * before deleted from the radix tree. Otherwise some
 166         * cached pages could be still attached to the orphan
 167         * old workgroup when the new one is available in the tree.
 168         */
 169        if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
 170                erofs_workgroup_unfreeze(grp, 1);
 171                return false;
 172        }
 173
 174        /*
 175         * It's impossible to fail after the workgroup is freezed,
 176         * however in order to avoid some race conditions, add a
 177         * DBG_BUGON to observe this in advance.
 178         */
 179        DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
 180                                                     grp->index)) != grp);
 181
 182        /*
 183         * If managed cache is on, last refcount should indicate
 184         * the related workstation.
 185         */
 186        erofs_workgroup_unfreeze_final(grp);
 187        return true;
 188}
 189
 190static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 191                                              unsigned long nr_shrink,
 192                                              bool cleanup)
 193{
 194        pgoff_t first_index = 0;
 195        void *batch[PAGEVEC_SIZE];
 196        unsigned int freed = 0;
 197
 198        int i, found;
 199repeat:
 200        xa_lock(&sbi->workstn_tree);
 201
 202        found = radix_tree_gang_lookup(&sbi->workstn_tree,
 203                                       batch, first_index, PAGEVEC_SIZE);
 204
 205        for (i = 0; i < found; ++i) {
 206                struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
 207
 208                first_index = grp->index + 1;
 209
 210                /* try to shrink each valid workgroup */
 211                if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 212                        continue;
 213
 214                ++freed;
 215                if (!--nr_shrink)
 216                        break;
 217        }
 218        xa_unlock(&sbi->workstn_tree);
 219
 220        if (i && nr_shrink)
 221                goto repeat;
 222        return freed;
 223}
 224
 225/* protected by 'erofs_sb_list_lock' */
 226static unsigned int shrinker_run_no;
 227
 228/* protects the mounted 'erofs_sb_list' */
 229static DEFINE_SPINLOCK(erofs_sb_list_lock);
 230static LIST_HEAD(erofs_sb_list);
 231
 232void erofs_shrinker_register(struct super_block *sb)
 233{
 234        struct erofs_sb_info *sbi = EROFS_SB(sb);
 235
 236        mutex_init(&sbi->umount_mutex);
 237
 238        spin_lock(&erofs_sb_list_lock);
 239        list_add(&sbi->list, &erofs_sb_list);
 240        spin_unlock(&erofs_sb_list_lock);
 241}
 242
 243void erofs_shrinker_unregister(struct super_block *sb)
 244{
 245        struct erofs_sb_info *const sbi = EROFS_SB(sb);
 246
 247        mutex_lock(&sbi->umount_mutex);
 248        erofs_shrink_workstation(sbi, ~0UL, true);
 249
 250        spin_lock(&erofs_sb_list_lock);
 251        list_del(&sbi->list);
 252        spin_unlock(&erofs_sb_list_lock);
 253        mutex_unlock(&sbi->umount_mutex);
 254}
 255
 256static unsigned long erofs_shrink_count(struct shrinker *shrink,
 257                                        struct shrink_control *sc)
 258{
 259        return atomic_long_read(&erofs_global_shrink_cnt);
 260}
 261
 262static unsigned long erofs_shrink_scan(struct shrinker *shrink,
 263                                       struct shrink_control *sc)
 264{
 265        struct erofs_sb_info *sbi;
 266        struct list_head *p;
 267
 268        unsigned long nr = sc->nr_to_scan;
 269        unsigned int run_no;
 270        unsigned long freed = 0;
 271
 272        spin_lock(&erofs_sb_list_lock);
 273        do {
 274                run_no = ++shrinker_run_no;
 275        } while (run_no == 0);
 276
 277        /* Iterate over all mounted superblocks and try to shrink them */
 278        p = erofs_sb_list.next;
 279        while (p != &erofs_sb_list) {
 280                sbi = list_entry(p, struct erofs_sb_info, list);
 281
 282                /*
 283                 * We move the ones we do to the end of the list, so we stop
 284                 * when we see one we have already done.
 285                 */
 286                if (sbi->shrinker_run_no == run_no)
 287                        break;
 288
 289                if (!mutex_trylock(&sbi->umount_mutex)) {
 290                        p = p->next;
 291                        continue;
 292                }
 293
 294                spin_unlock(&erofs_sb_list_lock);
 295                sbi->shrinker_run_no = run_no;
 296
 297                freed += erofs_shrink_workstation(sbi, nr, false);
 298
 299                spin_lock(&erofs_sb_list_lock);
 300                /* Get the next list element before we move this one */
 301                p = p->next;
 302
 303                /*
 304                 * Move this one to the end of the list to provide some
 305                 * fairness.
 306                 */
 307                list_move_tail(&sbi->list, &erofs_sb_list);
 308                mutex_unlock(&sbi->umount_mutex);
 309
 310                if (freed >= nr)
 311                        break;
 312        }
 313        spin_unlock(&erofs_sb_list_lock);
 314        return freed;
 315}
 316
 317static struct shrinker erofs_shrinker_info = {
 318        .scan_objects = erofs_shrink_scan,
 319        .count_objects = erofs_shrink_count,
 320        .seeks = DEFAULT_SEEKS,
 321};
 322
 323int __init erofs_init_shrinker(void)
 324{
 325        return register_shrinker(&erofs_shrinker_info);
 326}
 327
 328void erofs_exit_shrinker(void)
 329{
 330        unregister_shrinker(&erofs_shrinker_info);
 331}
 332#endif  /* !CONFIG_EROFS_FS_ZIP */
 333
 334