linux/fs/ubifs/shrinker.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * This file is part of UBIFS.
   4 *
   5 * Copyright (C) 2006-2008 Nokia Corporation.
   6 *
   7 * Authors: Artem Bityutskiy (Битюцкий Артём)
   8 *          Adrian Hunter
   9 */
  10
  11/*
  12 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
  13 * tree when Linux VM needs more RAM.
  14 *
  15 * We do not implement any LRU lists to find oldest znodes to free because it
  16 * would add additional overhead to the file system fast paths. So the shrinker
  17 * just walks the TNC tree when searching for znodes to free.
  18 *
  19 * If the root of a TNC sub-tree is clean and old enough, then the children are
  20 * also clean and old enough. So the shrinker walks the TNC in level order and
  21 * dumps entire sub-trees.
  22 *
  23 * The age of znodes is just the time-stamp when they were last looked at.
  24 * The current shrinker first tries to evict old znodes, then young ones.
  25 *
  26 * Since the shrinker is global, it has to protect against races with FS
  27 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
  28 */
  29
  30#include "ubifs.h"
  31
  32/* List of all UBIFS file-system instances */
  33LIST_HEAD(ubifs_infos);
  34
  35/*
  36 * We number each shrinker run and record the number on the ubifs_info structure
  37 * so that we can easily work out which ubifs_info structures have already been
  38 * done by the current run.
  39 */
  40static unsigned int shrinker_run_no;
  41
  42/* Protects 'ubifs_infos' list */
  43DEFINE_SPINLOCK(ubifs_infos_lock);
  44
  45/* Global clean znode counter (for all mounted UBIFS instances) */
  46atomic_long_t ubifs_clean_zn_cnt;
  47
  48/**
  49 * shrink_tnc - shrink TNC tree.
  50 * @c: UBIFS file-system description object
  51 * @nr: number of znodes to free
  52 * @age: the age of znodes to free
  53 * @contention: if any contention, this is set to %1
  54 *
  55 * This function traverses TNC tree and frees clean znodes. It does not free
  56 * clean znodes which younger then @age. Returns number of freed znodes.
  57 */
  58static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
  59{
  60        int total_freed = 0;
  61        struct ubifs_znode *znode, *zprev;
  62        time64_t time = ktime_get_seconds();
  63
  64        ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
  65        ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
  66
  67        if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
  68                return 0;
  69
  70        /*
  71         * Traverse the TNC tree in levelorder manner, so that it is possible
  72         * to destroy large sub-trees. Indeed, if a znode is old, then all its
  73         * children are older or of the same age.
  74         *
  75         * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
  76         * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
  77         * changed only when the 'c->tnc_mutex' is held.
  78         */
  79        zprev = NULL;
  80        znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
  81        while (znode && total_freed < nr &&
  82               atomic_long_read(&c->clean_zn_cnt) > 0) {
  83                int freed;
  84
  85                /*
  86                 * If the znode is clean, but it is in the 'c->cnext' list, this
  87                 * means that this znode has just been written to flash as a
  88                 * part of commit and was marked clean. They will be removed
  89                 * from the list at end commit. We cannot change the list,
  90                 * because it is not protected by any mutex (design decision to
  91                 * make commit really independent and parallel to main I/O). So
  92                 * we just skip these znodes.
  93                 *
  94                 * Note, the 'clean_zn_cnt' counters are not updated until
  95                 * after the commit, so the UBIFS shrinker does not report
  96                 * the znodes which are in the 'c->cnext' list as freeable.
  97                 *
  98                 * Also note, if the root of a sub-tree is not in 'c->cnext',
  99                 * then the whole sub-tree is not in 'c->cnext' as well, so it
 100                 * is safe to dump whole sub-tree.
 101                 */
 102
 103                if (znode->cnext) {
 104                        /*
 105                         * Very soon these znodes will be removed from the list
 106                         * and become freeable.
 107                         */
 108                        *contention = 1;
 109                } else if (!ubifs_zn_dirty(znode) &&
 110                           abs(time - znode->time) >= age) {
 111                        if (znode->parent)
 112                                znode->parent->zbranch[znode->iip].znode = NULL;
 113                        else
 114                                c->zroot.znode = NULL;
 115
 116                        freed = ubifs_destroy_tnc_subtree(c, znode);
 117                        atomic_long_sub(freed, &ubifs_clean_zn_cnt);
 118                        atomic_long_sub(freed, &c->clean_zn_cnt);
 119                        total_freed += freed;
 120                        znode = zprev;
 121                }
 122
 123                if (unlikely(!c->zroot.znode))
 124                        break;
 125
 126                zprev = znode;
 127                znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
 128                cond_resched();
 129        }
 130
 131        return total_freed;
 132}
 133
 134/**
 135 * shrink_tnc_trees - shrink UBIFS TNC trees.
 136 * @nr: number of znodes to free
 137 * @age: the age of znodes to free
 138 * @contention: if any contention, this is set to %1
 139 *
 140 * This function walks the list of mounted UBIFS file-systems and frees clean
 141 * znodes which are older than @age, until at least @nr znodes are freed.
 142 * Returns the number of freed znodes.
 143 */
 144static int shrink_tnc_trees(int nr, int age, int *contention)
 145{
 146        struct ubifs_info *c;
 147        struct list_head *p;
 148        unsigned int run_no;
 149        int freed = 0;
 150
 151        spin_lock(&ubifs_infos_lock);
 152        do {
 153                run_no = ++shrinker_run_no;
 154        } while (run_no == 0);
 155        /* Iterate over all mounted UBIFS file-systems and try to shrink them */
 156        p = ubifs_infos.next;
 157        while (p != &ubifs_infos) {
 158                c = list_entry(p, struct ubifs_info, infos_list);
 159                /*
 160                 * We move the ones we do to the end of the list, so we stop
 161                 * when we see one we have already done.
 162                 */
 163                if (c->shrinker_run_no == run_no)
 164                        break;
 165                if (!mutex_trylock(&c->umount_mutex)) {
 166                        /* Some un-mount is in progress, try next FS */
 167                        *contention = 1;
 168                        p = p->next;
 169                        continue;
 170                }
 171                /*
 172                 * We're holding 'c->umount_mutex', so the file-system won't go
 173                 * away.
 174                 */
 175                if (!mutex_trylock(&c->tnc_mutex)) {
 176                        mutex_unlock(&c->umount_mutex);
 177                        *contention = 1;
 178                        p = p->next;
 179                        continue;
 180                }
 181                spin_unlock(&ubifs_infos_lock);
 182                /*
 183                 * OK, now we have TNC locked, the file-system cannot go away -
 184                 * it is safe to reap the cache.
 185                 */
 186                c->shrinker_run_no = run_no;
 187                freed += shrink_tnc(c, nr, age, contention);
 188                mutex_unlock(&c->tnc_mutex);
 189                spin_lock(&ubifs_infos_lock);
 190                /* Get the next list element before we move this one */
 191                p = p->next;
 192                /*
 193                 * Move this one to the end of the list to provide some
 194                 * fairness.
 195                 */
 196                list_move_tail(&c->infos_list, &ubifs_infos);
 197                mutex_unlock(&c->umount_mutex);
 198                if (freed >= nr)
 199                        break;
 200        }
 201        spin_unlock(&ubifs_infos_lock);
 202        return freed;
 203}
 204
 205/**
 206 * kick_a_thread - kick a background thread to start commit.
 207 *
 208 * This function kicks a background thread to start background commit. Returns
 209 * %-1 if a thread was kicked or there is another reason to assume the memory
 210 * will soon be freed or become freeable. If there are no dirty znodes, returns
 211 * %0.
 212 */
 213static int kick_a_thread(void)
 214{
 215        int i;
 216        struct ubifs_info *c;
 217
 218        /*
 219         * Iterate over all mounted UBIFS file-systems and find out if there is
 220         * already an ongoing commit operation there. If no, then iterate for
 221         * the second time and initiate background commit.
 222         */
 223        spin_lock(&ubifs_infos_lock);
 224        for (i = 0; i < 2; i++) {
 225                list_for_each_entry(c, &ubifs_infos, infos_list) {
 226                        long dirty_zn_cnt;
 227
 228                        if (!mutex_trylock(&c->umount_mutex)) {
 229                                /*
 230                                 * Some un-mount is in progress, it will
 231                                 * certainly free memory, so just return.
 232                                 */
 233                                spin_unlock(&ubifs_infos_lock);
 234                                return -1;
 235                        }
 236
 237                        dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
 238
 239                        if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
 240                            c->ro_mount || c->ro_error) {
 241                                mutex_unlock(&c->umount_mutex);
 242                                continue;
 243                        }
 244
 245                        if (c->cmt_state != COMMIT_RESTING) {
 246                                spin_unlock(&ubifs_infos_lock);
 247                                mutex_unlock(&c->umount_mutex);
 248                                return -1;
 249                        }
 250
 251                        if (i == 1) {
 252                                list_move_tail(&c->infos_list, &ubifs_infos);
 253                                spin_unlock(&ubifs_infos_lock);
 254
 255                                ubifs_request_bg_commit(c);
 256                                mutex_unlock(&c->umount_mutex);
 257                                return -1;
 258                        }
 259                        mutex_unlock(&c->umount_mutex);
 260                }
 261        }
 262        spin_unlock(&ubifs_infos_lock);
 263
 264        return 0;
 265}
 266
 267unsigned long ubifs_shrink_count(struct shrinker *shrink,
 268                                 struct shrink_control *sc)
 269{
 270        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 271
 272        /*
 273         * Due to the way UBIFS updates the clean znode counter it may
 274         * temporarily be negative.
 275         */
 276        return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
 277}
 278
 279unsigned long ubifs_shrink_scan(struct shrinker *shrink,
 280                                struct shrink_control *sc)
 281{
 282        unsigned long nr = sc->nr_to_scan;
 283        int contention = 0;
 284        unsigned long freed;
 285        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 286
 287        if (!clean_zn_cnt) {
 288                /*
 289                 * No clean znodes, nothing to reap. All we can do in this case
 290                 * is to kick background threads to start commit, which will
 291                 * probably make clean znodes which, in turn, will be freeable.
 292                 * And we return -1 which means will make VM call us again
 293                 * later.
 294                 */
 295                dbg_tnc("no clean znodes, kick a thread");
 296                return kick_a_thread();
 297        }
 298
 299        freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
 300        if (freed >= nr)
 301                goto out;
 302
 303        dbg_tnc("not enough old znodes, try to free young ones");
 304        freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
 305        if (freed >= nr)
 306                goto out;
 307
 308        dbg_tnc("not enough young znodes, free all");
 309        freed += shrink_tnc_trees(nr - freed, 0, &contention);
 310
 311        if (!freed && contention) {
 312                dbg_tnc("freed nothing, but contention");
 313                return SHRINK_STOP;
 314        }
 315
 316out:
 317        dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
 318        return freed;
 319}
 320