linux/fs/ubifs/shrinker.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published by
   8 * the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program; if not, write to the Free Software Foundation, Inc., 51
  17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18 *
  19 * Authors: Artem Bityutskiy (Битюцкий Артём)
  20 *          Adrian Hunter
  21 */
  22
  23/*
  24 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
  25 * tree when Linux VM needs more RAM.
  26 *
  27 * We do not implement any LRU lists to find oldest znodes to free because it
  28 * would add additional overhead to the file system fast paths. So the shrinker
  29 * just walks the TNC tree when searching for znodes to free.
  30 *
  31 * If the root of a TNC sub-tree is clean and old enough, then the children are
  32 * also clean and old enough. So the shrinker walks the TNC in level order and
  33 * dumps entire sub-trees.
  34 *
  35 * The age of znodes is just the time-stamp when they were last looked at.
  36 * The current shrinker first tries to evict old znodes, then young ones.
  37 *
  38 * Since the shrinker is global, it has to protect against races with FS
  39 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
  40 */
  41
  42#include "ubifs.h"
  43
  44/* List of all UBIFS file-system instances */
  45LIST_HEAD(ubifs_infos);
  46
  47/*
  48 * We number each shrinker run and record the number on the ubifs_info structure
  49 * so that we can easily work out which ubifs_info structures have already been
  50 * done by the current run.
  51 */
  52static unsigned int shrinker_run_no;
  53
  54/* Protects 'ubifs_infos' list */
  55DEFINE_SPINLOCK(ubifs_infos_lock);
  56
  57/* Global clean znode counter (for all mounted UBIFS instances) */
  58atomic_long_t ubifs_clean_zn_cnt;
  59
  60/**
  61 * shrink_tnc - shrink TNC tree.
  62 * @c: UBIFS file-system description object
  63 * @nr: number of znodes to free
  64 * @age: the age of znodes to free
  65 * @contention: if any contention, this is set to %1
  66 *
  67 * This function traverses TNC tree and frees clean znodes. It does not free
  68 * clean znodes which younger then @age. Returns number of freed znodes.
  69 */
  70static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
  71{
  72        int total_freed = 0;
  73        struct ubifs_znode *znode, *zprev;
  74        int time = get_seconds();
  75
  76        ubifs_assert(mutex_is_locked(&c->umount_mutex));
  77        ubifs_assert(mutex_is_locked(&c->tnc_mutex));
  78
  79        if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
  80                return 0;
  81
  82        /*
  83         * Traverse the TNC tree in levelorder manner, so that it is possible
  84         * to destroy large sub-trees. Indeed, if a znode is old, then all its
  85         * children are older or of the same age.
  86         *
  87         * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
  88         * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
  89         * changed only when the 'c->tnc_mutex' is held.
  90         */
  91        zprev = NULL;
  92        znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
  93        while (znode && total_freed < nr &&
  94               atomic_long_read(&c->clean_zn_cnt) > 0) {
  95                int freed;
  96
  97                /*
  98                 * If the znode is clean, but it is in the 'c->cnext' list, this
  99                 * means that this znode has just been written to flash as a
 100                 * part of commit and was marked clean. They will be removed
 101                 * from the list at end commit. We cannot change the list,
 102                 * because it is not protected by any mutex (design decision to
 103                 * make commit really independent and parallel to main I/O). So
 104                 * we just skip these znodes.
 105                 *
 106                 * Note, the 'clean_zn_cnt' counters are not updated until
 107                 * after the commit, so the UBIFS shrinker does not report
 108                 * the znodes which are in the 'c->cnext' list as freeable.
 109                 *
 110                 * Also note, if the root of a sub-tree is not in 'c->cnext',
 111                 * then the whole sub-tree is not in 'c->cnext' as well, so it
 112                 * is safe to dump whole sub-tree.
 113                 */
 114
 115                if (znode->cnext) {
 116                        /*
 117                         * Very soon these znodes will be removed from the list
 118                         * and become freeable.
 119                         */
 120                        *contention = 1;
 121                } else if (!ubifs_zn_dirty(znode) &&
 122                           abs(time - znode->time) >= age) {
 123                        if (znode->parent)
 124                                znode->parent->zbranch[znode->iip].znode = NULL;
 125                        else
 126                                c->zroot.znode = NULL;
 127
 128                        freed = ubifs_destroy_tnc_subtree(znode);
 129                        atomic_long_sub(freed, &ubifs_clean_zn_cnt);
 130                        atomic_long_sub(freed, &c->clean_zn_cnt);
 131                        total_freed += freed;
 132                        znode = zprev;
 133                }
 134
 135                if (unlikely(!c->zroot.znode))
 136                        break;
 137
 138                zprev = znode;
 139                znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
 140                cond_resched();
 141        }
 142
 143        return total_freed;
 144}
 145
 146/**
 147 * shrink_tnc_trees - shrink UBIFS TNC trees.
 148 * @nr: number of znodes to free
 149 * @age: the age of znodes to free
 150 * @contention: if any contention, this is set to %1
 151 *
 152 * This function walks the list of mounted UBIFS file-systems and frees clean
 153 * znodes which are older than @age, until at least @nr znodes are freed.
 154 * Returns the number of freed znodes.
 155 */
 156static int shrink_tnc_trees(int nr, int age, int *contention)
 157{
 158        struct ubifs_info *c;
 159        struct list_head *p;
 160        unsigned int run_no;
 161        int freed = 0;
 162
 163        spin_lock(&ubifs_infos_lock);
 164        do {
 165                run_no = ++shrinker_run_no;
 166        } while (run_no == 0);
 167        /* Iterate over all mounted UBIFS file-systems and try to shrink them */
 168        p = ubifs_infos.next;
 169        while (p != &ubifs_infos) {
 170                c = list_entry(p, struct ubifs_info, infos_list);
 171                /*
 172                 * We move the ones we do to the end of the list, so we stop
 173                 * when we see one we have already done.
 174                 */
 175                if (c->shrinker_run_no == run_no)
 176                        break;
 177                if (!mutex_trylock(&c->umount_mutex)) {
 178                        /* Some un-mount is in progress, try next FS */
 179                        *contention = 1;
 180                        p = p->next;
 181                        continue;
 182                }
 183                /*
 184                 * We're holding 'c->umount_mutex', so the file-system won't go
 185                 * away.
 186                 */
 187                if (!mutex_trylock(&c->tnc_mutex)) {
 188                        mutex_unlock(&c->umount_mutex);
 189                        *contention = 1;
 190                        p = p->next;
 191                        continue;
 192                }
 193                spin_unlock(&ubifs_infos_lock);
 194                /*
 195                 * OK, now we have TNC locked, the file-system cannot go away -
 196                 * it is safe to reap the cache.
 197                 */
 198                c->shrinker_run_no = run_no;
 199                freed += shrink_tnc(c, nr, age, contention);
 200                mutex_unlock(&c->tnc_mutex);
 201                spin_lock(&ubifs_infos_lock);
 202                /* Get the next list element before we move this one */
 203                p = p->next;
 204                /*
 205                 * Move this one to the end of the list to provide some
 206                 * fairness.
 207                 */
 208                list_move_tail(&c->infos_list, &ubifs_infos);
 209                mutex_unlock(&c->umount_mutex);
 210                if (freed >= nr)
 211                        break;
 212        }
 213        spin_unlock(&ubifs_infos_lock);
 214        return freed;
 215}
 216
 217/**
 218 * kick_a_thread - kick a background thread to start commit.
 219 *
 220 * This function kicks a background thread to start background commit. Returns
 221 * %-1 if a thread was kicked or there is another reason to assume the memory
 222 * will soon be freed or become freeable. If there are no dirty znodes, returns
 223 * %0.
 224 */
 225static int kick_a_thread(void)
 226{
 227        int i;
 228        struct ubifs_info *c;
 229
 230        /*
 231         * Iterate over all mounted UBIFS file-systems and find out if there is
 232         * already an ongoing commit operation there. If no, then iterate for
 233         * the second time and initiate background commit.
 234         */
 235        spin_lock(&ubifs_infos_lock);
 236        for (i = 0; i < 2; i++) {
 237                list_for_each_entry(c, &ubifs_infos, infos_list) {
 238                        long dirty_zn_cnt;
 239
 240                        if (!mutex_trylock(&c->umount_mutex)) {
 241                                /*
 242                                 * Some un-mount is in progress, it will
 243                                 * certainly free memory, so just return.
 244                                 */
 245                                spin_unlock(&ubifs_infos_lock);
 246                                return -1;
 247                        }
 248
 249                        dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
 250
 251                        if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
 252                            c->ro_mount || c->ro_error) {
 253                                mutex_unlock(&c->umount_mutex);
 254                                continue;
 255                        }
 256
 257                        if (c->cmt_state != COMMIT_RESTING) {
 258                                spin_unlock(&ubifs_infos_lock);
 259                                mutex_unlock(&c->umount_mutex);
 260                                return -1;
 261                        }
 262
 263                        if (i == 1) {
 264                                list_move_tail(&c->infos_list, &ubifs_infos);
 265                                spin_unlock(&ubifs_infos_lock);
 266
 267                                ubifs_request_bg_commit(c);
 268                                mutex_unlock(&c->umount_mutex);
 269                                return -1;
 270                        }
 271                        mutex_unlock(&c->umount_mutex);
 272                }
 273        }
 274        spin_unlock(&ubifs_infos_lock);
 275
 276        return 0;
 277}
 278
 279unsigned long ubifs_shrink_count(struct shrinker *shrink,
 280                                 struct shrink_control *sc)
 281{
 282        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 283
 284        /*
 285         * Due to the way UBIFS updates the clean znode counter it may
 286         * temporarily be negative.
 287         */
 288        return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
 289}
 290
 291unsigned long ubifs_shrink_scan(struct shrinker *shrink,
 292                                struct shrink_control *sc)
 293{
 294        unsigned long nr = sc->nr_to_scan;
 295        int contention = 0;
 296        unsigned long freed;
 297        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 298
 299        if (!clean_zn_cnt) {
 300                /*
 301                 * No clean znodes, nothing to reap. All we can do in this case
 302                 * is to kick background threads to start commit, which will
 303                 * probably make clean znodes which, in turn, will be freeable.
 304                 * And we return -1 which means will make VM call us again
 305                 * later.
 306                 */
 307                dbg_tnc("no clean znodes, kick a thread");
 308                return kick_a_thread();
 309        }
 310
 311        freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
 312        if (freed >= nr)
 313                goto out;
 314
 315        dbg_tnc("not enough old znodes, try to free young ones");
 316        freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
 317        if (freed >= nr)
 318                goto out;
 319
 320        dbg_tnc("not enough young znodes, free all");
 321        freed += shrink_tnc_trees(nr - freed, 0, &contention);
 322
 323        if (!freed && contention) {
 324                dbg_tnc("freed nothing, but contention");
 325                return SHRINK_STOP;
 326        }
 327
 328out:
 329        dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
 330        return freed;
 331}
 332