linux/fs/ubifs/shrinker.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published by
   8 * the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program; if not, write to the Free Software Foundation, Inc., 51
  17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18 *
  19 * Authors: Artem Bityutskiy (Битюцкий Артём)
  20 *          Adrian Hunter
  21 */
  22
  23/*
  24 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
  25 * tree when Linux VM needs more RAM.
  26 *
  27 * We do not implement any LRU lists to find oldest znodes to free because it
  28 * would add additional overhead to the file system fast paths. So the shrinker
  29 * just walks the TNC tree when searching for znodes to free.
  30 *
  31 * If the root of a TNC sub-tree is clean and old enough, then the children are
  32 * also clean and old enough. So the shrinker walks the TNC in level order and
  33 * dumps entire sub-trees.
  34 *
  35 * The age of znodes is just the time-stamp when they were last looked at.
  36 * The current shrinker first tries to evict old znodes, then young ones.
  37 *
  38 * Since the shrinker is global, it has to protect against races with FS
  39 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
  40 */
  41
  42#include "ubifs.h"
  43
  44/* List of all UBIFS file-system instances */
  45LIST_HEAD(ubifs_infos);
  46
  47/*
  48 * We number each shrinker run and record the number on the ubifs_info structure
  49 * so that we can easily work out which ubifs_info structures have already been
  50 * done by the current run.
  51 */
  52static unsigned int shrinker_run_no;
  53
  54/* Protects 'ubifs_infos' list */
  55DEFINE_SPINLOCK(ubifs_infos_lock);
  56
  57/* Global clean znode counter (for all mounted UBIFS instances) */
  58atomic_long_t ubifs_clean_zn_cnt;
  59
  60/**
  61 * shrink_tnc - shrink TNC tree.
  62 * @c: UBIFS file-system description object
  63 * @nr: number of znodes to free
  64 * @age: the age of znodes to free
  65 * @contention: if any contention, this is set to %1
  66 *
  67 * This function traverses TNC tree and frees clean znodes. It does not free
  68 * clean znodes which younger then @age. Returns number of freed znodes.
  69 */
  70static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
  71{
  72        int total_freed = 0;
  73        struct ubifs_znode *znode, *zprev;
  74        int time = get_seconds();
  75
  76        ubifs_assert(mutex_is_locked(&c->umount_mutex));
  77        ubifs_assert(mutex_is_locked(&c->tnc_mutex));
  78
  79        if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
  80                return 0;
  81
  82        /*
  83         * Traverse the TNC tree in levelorder manner, so that it is possible
  84         * to destroy large sub-trees. Indeed, if a znode is old, then all its
  85         * children are older or of the same age.
  86         *
  87         * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
  88         * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
  89         * changed only when the 'c->tnc_mutex' is held.
  90         */
  91        zprev = NULL;
  92        znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
  93        while (znode && total_freed < nr &&
  94               atomic_long_read(&c->clean_zn_cnt) > 0) {
  95                int freed;
  96
  97                /*
  98                 * If the znode is clean, but it is in the 'c->cnext' list, this
  99                 * means that this znode has just been written to flash as a
 100                 * part of commit and was marked clean. They will be removed
 101                 * from the list at end commit. We cannot change the list,
 102                 * because it is not protected by any mutex (design decision to
 103                 * make commit really independent and parallel to main I/O). So
 104                 * we just skip these znodes.
 105                 *
 106                 * Note, the 'clean_zn_cnt' counters are not updated until
 107                 * after the commit, so the UBIFS shrinker does not report
 108                 * the znodes which are in the 'c->cnext' list as freeable.
 109                 *
 110                 * Also note, if the root of a sub-tree is not in 'c->cnext',
 111                 * then the whole sub-tree is not in 'c->cnext' as well, so it
 112                 * is safe to dump whole sub-tree.
 113                 */
 114
 115                if (znode->cnext) {
 116                        /*
 117                         * Very soon these znodes will be removed from the list
 118                         * and become freeable.
 119                         */
 120                        *contention = 1;
 121                } else if (!ubifs_zn_dirty(znode) &&
 122                           abs(time - znode->time) >= age) {
 123                        if (znode->parent)
 124                                znode->parent->zbranch[znode->iip].znode = NULL;
 125                        else
 126                                c->zroot.znode = NULL;
 127
 128                        freed = ubifs_destroy_tnc_subtree(znode);
 129                        atomic_long_sub(freed, &ubifs_clean_zn_cnt);
 130                        atomic_long_sub(freed, &c->clean_zn_cnt);
 131                        ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
 132                        total_freed += freed;
 133                        znode = zprev;
 134                }
 135
 136                if (unlikely(!c->zroot.znode))
 137                        break;
 138
 139                zprev = znode;
 140                znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
 141                cond_resched();
 142        }
 143
 144        return total_freed;
 145}
 146
 147/**
 148 * shrink_tnc_trees - shrink UBIFS TNC trees.
 149 * @nr: number of znodes to free
 150 * @age: the age of znodes to free
 151 * @contention: if any contention, this is set to %1
 152 *
 153 * This function walks the list of mounted UBIFS file-systems and frees clean
 154 * znodes which are older than @age, until at least @nr znodes are freed.
 155 * Returns the number of freed znodes.
 156 */
 157static int shrink_tnc_trees(int nr, int age, int *contention)
 158{
 159        struct ubifs_info *c;
 160        struct list_head *p;
 161        unsigned int run_no;
 162        int freed = 0;
 163
 164        spin_lock(&ubifs_infos_lock);
 165        do {
 166                run_no = ++shrinker_run_no;
 167        } while (run_no == 0);
 168        /* Iterate over all mounted UBIFS file-systems and try to shrink them */
 169        p = ubifs_infos.next;
 170        while (p != &ubifs_infos) {
 171                c = list_entry(p, struct ubifs_info, infos_list);
 172                /*
 173                 * We move the ones we do to the end of the list, so we stop
 174                 * when we see one we have already done.
 175                 */
 176                if (c->shrinker_run_no == run_no)
 177                        break;
 178                if (!mutex_trylock(&c->umount_mutex)) {
 179                        /* Some un-mount is in progress, try next FS */
 180                        *contention = 1;
 181                        p = p->next;
 182                        continue;
 183                }
 184                /*
 185                 * We're holding 'c->umount_mutex', so the file-system won't go
 186                 * away.
 187                 */
 188                if (!mutex_trylock(&c->tnc_mutex)) {
 189                        mutex_unlock(&c->umount_mutex);
 190                        *contention = 1;
 191                        p = p->next;
 192                        continue;
 193                }
 194                spin_unlock(&ubifs_infos_lock);
 195                /*
 196                 * OK, now we have TNC locked, the file-system cannot go away -
 197                 * it is safe to reap the cache.
 198                 */
 199                c->shrinker_run_no = run_no;
 200                freed += shrink_tnc(c, nr, age, contention);
 201                mutex_unlock(&c->tnc_mutex);
 202                spin_lock(&ubifs_infos_lock);
 203                /* Get the next list element before we move this one */
 204                p = p->next;
 205                /*
 206                 * Move this one to the end of the list to provide some
 207                 * fairness.
 208                 */
 209                list_move_tail(&c->infos_list, &ubifs_infos);
 210                mutex_unlock(&c->umount_mutex);
 211                if (freed >= nr)
 212                        break;
 213        }
 214        spin_unlock(&ubifs_infos_lock);
 215        return freed;
 216}
 217
 218/**
 219 * kick_a_thread - kick a background thread to start commit.
 220 *
 221 * This function kicks a background thread to start background commit. Returns
 222 * %-1 if a thread was kicked or there is another reason to assume the memory
 223 * will soon be freed or become freeable. If there are no dirty znodes, returns
 224 * %0.
 225 */
 226static int kick_a_thread(void)
 227{
 228        int i;
 229        struct ubifs_info *c;
 230
 231        /*
 232         * Iterate over all mounted UBIFS file-systems and find out if there is
 233         * already an ongoing commit operation there. If no, then iterate for
 234         * the second time and initiate background commit.
 235         */
 236        spin_lock(&ubifs_infos_lock);
 237        for (i = 0; i < 2; i++) {
 238                list_for_each_entry(c, &ubifs_infos, infos_list) {
 239                        long dirty_zn_cnt;
 240
 241                        if (!mutex_trylock(&c->umount_mutex)) {
 242                                /*
 243                                 * Some un-mount is in progress, it will
 244                                 * certainly free memory, so just return.
 245                                 */
 246                                spin_unlock(&ubifs_infos_lock);
 247                                return -1;
 248                        }
 249
 250                        dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
 251
 252                        if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
 253                            c->ro_media) {
 254                                mutex_unlock(&c->umount_mutex);
 255                                continue;
 256                        }
 257
 258                        if (c->cmt_state != COMMIT_RESTING) {
 259                                spin_unlock(&ubifs_infos_lock);
 260                                mutex_unlock(&c->umount_mutex);
 261                                return -1;
 262                        }
 263
 264                        if (i == 1) {
 265                                list_move_tail(&c->infos_list, &ubifs_infos);
 266                                spin_unlock(&ubifs_infos_lock);
 267
 268                                ubifs_request_bg_commit(c);
 269                                mutex_unlock(&c->umount_mutex);
 270                                return -1;
 271                        }
 272                        mutex_unlock(&c->umount_mutex);
 273                }
 274        }
 275        spin_unlock(&ubifs_infos_lock);
 276
 277        return 0;
 278}
 279
 280int ubifs_shrinker(int nr, gfp_t gfp_mask)
 281{
 282        int freed, contention = 0;
 283        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 284
 285        if (nr == 0)
 286                return clean_zn_cnt;
 287
 288        if (!clean_zn_cnt) {
 289                /*
 290                 * No clean znodes, nothing to reap. All we can do in this case
 291                 * is to kick background threads to start commit, which will
 292                 * probably make clean znodes which, in turn, will be freeable.
 293                 * And we return -1 which means will make VM call us again
 294                 * later.
 295                 */
 296                dbg_tnc("no clean znodes, kick a thread");
 297                return kick_a_thread();
 298        }
 299
 300        freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
 301        if (freed >= nr)
 302                goto out;
 303
 304        dbg_tnc("not enough old znodes, try to free young ones");
 305        freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
 306        if (freed >= nr)
 307                goto out;
 308
 309        dbg_tnc("not enough young znodes, free all");
 310        freed += shrink_tnc_trees(nr - freed, 0, &contention);
 311
 312        if (!freed && contention) {
 313                dbg_tnc("freed nothing, but contention");
 314                return -1;
 315        }
 316
 317out:
 318        dbg_tnc("%d znodes were freed, requested %d", freed, nr);
 319        return freed;
 320}
 321