linux/fs/logfs/gc.c
<<
>>
Prefs
   1/*
   2 * fs/logfs/gc.c        - garbage collection code
   3 *
   4 * As should be obvious for Linux kernel code, license is GPLv2
   5 *
   6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7 */
   8#include "logfs.h"
   9#include <linux/sched.h>
  10#include <linux/slab.h>
  11
  12/*
  13 * Wear leveling needs to kick in when the difference between low erase
  14 * counts and high erase counts gets too big.  A good value for "too big"
  15 * may be somewhat below 10% of maximum erase count for the device.
  16 * Why not 397, to pick a nice round number with no specific meaning? :)
  17 *
  18 * WL_RATELIMIT is the minimum time between two wear level events.  A huge
  19 * number of segments may fulfil the requirements for wear leveling at the
  20 * same time.  If that happens we don't want to cause a latency from hell,
  21 * but just gently pick one segment every so often and minimize overhead.
  22 */
  23#define WL_DELTA 397
  24#define WL_RATELIMIT 100
  25#define MAX_OBJ_ALIASES 2600
  26#define SCAN_RATIO 512  /* number of scanned segments per gc'd segment */
  27#define LIST_SIZE 64    /* base size of candidate lists */
  28#define SCAN_ROUNDS 128 /* maximum number of complete medium scans */
  29#define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
  30
  31static int no_free_segments(struct super_block *sb)
  32{
  33        struct logfs_super *super = logfs_super(sb);
  34
  35        return super->s_free_list.count;
  36}
  37
  38/* journal has distance -1, top-most ifile layer distance 0 */
  39static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
  40{
  41        struct logfs_super *super = logfs_super(sb);
  42        u8 gc_level = (__force u8)__gc_level;
  43
  44        switch (gc_level) {
  45        case 0: /* fall through */
  46        case 1: /* fall through */
  47        case 2: /* fall through */
  48        case 3:
  49                /* file data or indirect blocks */
  50                return super->s_ifile_levels + super->s_iblock_levels - gc_level;
  51        case 6: /* fall through */
  52        case 7: /* fall through */
  53        case 8: /* fall through */
  54        case 9:
  55                /* inode file data or indirect blocks */
  56                return super->s_ifile_levels - (gc_level - 6);
  57        default:
  58                printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
  59                                gc_level);
  60                WARN_ON(1);
  61                return super->s_ifile_levels + super->s_iblock_levels;
  62        }
  63}
  64
  65static int segment_is_reserved(struct super_block *sb, u32 segno)
  66{
  67        struct logfs_super *super = logfs_super(sb);
  68        struct logfs_area *area;
  69        void *reserved;
  70        int i;
  71
  72        /* Some segments are reserved.  Just pretend they were all valid */
  73        reserved = btree_lookup32(&super->s_reserved_segments, segno);
  74        if (reserved)
  75                return 1;
  76
  77        /* Currently open segments */
  78        for_each_area(i) {
  79                area = super->s_area[i];
  80                if (area->a_is_open && area->a_segno == segno)
  81                        return 1;
  82        }
  83
  84        return 0;
  85}
  86
  87static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  88{
  89        BUG();
  90}
  91
  92/*
  93 * Returns the bytes consumed by valid objects in this segment.  Object headers
  94 * are counted, the segment header is not.
  95 */
  96static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
  97                gc_level_t *gc_level)
  98{
  99        struct logfs_segment_entry se;
 100        u32 ec_level;
 101
 102        logfs_get_segment_entry(sb, segno, &se);
 103        if (se.ec_level == cpu_to_be32(BADSEG) ||
 104                        se.valid == cpu_to_be32(RESERVED))
 105                return RESERVED;
 106
 107        ec_level = be32_to_cpu(se.ec_level);
 108        *ec = ec_level >> 4;
 109        *gc_level = GC_LEVEL(ec_level & 0xf);
 110        return be32_to_cpu(se.valid);
 111}
 112
 113static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
 114                u64 bix, gc_level_t gc_level)
 115{
 116        struct inode *inode;
 117        int err, cookie;
 118
 119        inode = logfs_safe_iget(sb, ino, &cookie);
 120        err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
 121        BUG_ON(err);
 122        logfs_safe_iput(inode, cookie);
 123}
 124
 125static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
 126{
 127        struct logfs_super *super = logfs_super(sb);
 128        struct logfs_segment_header sh;
 129        struct logfs_object_header oh;
 130        u64 ofs, ino, bix;
 131        u32 seg_ofs, logical_segno, cleaned = 0;
 132        int err, len, valid;
 133        gc_level_t gc_level;
 134
 135        LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
 136
 137        btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
 138        err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
 139        BUG_ON(err);
 140        gc_level = GC_LEVEL(sh.level);
 141        logical_segno = be32_to_cpu(sh.segno);
 142        if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
 143                logfs_mark_segment_bad(sb, segno);
 144                cleaned = -1;
 145                goto out;
 146        }
 147
 148        for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
 149                        seg_ofs + sizeof(oh) < super->s_segsize; ) {
 150                ofs = dev_ofs(sb, logical_segno, seg_ofs);
 151                err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
 152                                &oh);
 153                BUG_ON(err);
 154
 155                if (!memchr_inv(&oh, 0xff, sizeof(oh)))
 156                        break;
 157
 158                if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
 159                        logfs_mark_segment_bad(sb, segno);
 160                        cleaned = super->s_segsize - 1;
 161                        goto out;
 162                }
 163
 164                ino = be64_to_cpu(oh.ino);
 165                bix = be64_to_cpu(oh.bix);
 166                len = sizeof(oh) + be16_to_cpu(oh.len);
 167                valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
 168                if (valid == 1) {
 169                        logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
 170                        cleaned += len;
 171                } else if (valid == 2) {
 172                        /* Will be invalid upon journal commit */
 173                        cleaned += len;
 174                }
 175                seg_ofs += len;
 176        }
 177out:
 178        btree_remove32(&super->s_reserved_segments, segno);
 179        return cleaned;
 180}
 181
 182static struct gc_candidate *add_list(struct gc_candidate *cand,
 183                struct candidate_list *list)
 184{
 185        struct rb_node **p = &list->rb_tree.rb_node;
 186        struct rb_node *parent = NULL;
 187        struct gc_candidate *cur;
 188        int comp;
 189
 190        cand->list = list;
 191        while (*p) {
 192                parent = *p;
 193                cur = rb_entry(parent, struct gc_candidate, rb_node);
 194
 195                if (list->sort_by_ec)
 196                        comp = cand->erase_count < cur->erase_count;
 197                else
 198                        comp = cand->valid < cur->valid;
 199
 200                if (comp)
 201                        p = &parent->rb_left;
 202                else
 203                        p = &parent->rb_right;
 204        }
 205        rb_link_node(&cand->rb_node, parent, p);
 206        rb_insert_color(&cand->rb_node, &list->rb_tree);
 207
 208        if (list->count <= list->maxcount) {
 209                list->count++;
 210                return NULL;
 211        }
 212        cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
 213        rb_erase(&cand->rb_node, &list->rb_tree);
 214        cand->list = NULL;
 215        return cand;
 216}
 217
 218static void remove_from_list(struct gc_candidate *cand)
 219{
 220        struct candidate_list *list = cand->list;
 221
 222        rb_erase(&cand->rb_node, &list->rb_tree);
 223        list->count--;
 224}
 225
 226static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
 227{
 228        struct logfs_super *super = logfs_super(sb);
 229
 230        btree_remove32(&super->s_cand_tree, cand->segno);
 231        kfree(cand);
 232}
 233
 234u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
 235{
 236        struct gc_candidate *cand;
 237        u32 segno;
 238
 239        BUG_ON(list->count == 0);
 240
 241        cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
 242        remove_from_list(cand);
 243        segno = cand->segno;
 244        if (ec)
 245                *ec = cand->erase_count;
 246        free_candidate(sb, cand);
 247        return segno;
 248}
 249
 250/*
 251 * We have several lists to manage segments with.  The reserve_list is used to
 252 * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
 253 * list.
 254 * The free_list contains free segments for normal usage.  It usually gets the
 255 * second pick after the reserve_list.  But when the free_list is running short
 256 * it is more important to keep the free_list full than to keep a reserve.
 257 *
 258 * Segments that are not free are put onto a per-level low_list.  If we have
 259 * to run garbage collection, we pick a candidate from there.  All segments on
 260 * those lists should have at least some free space so GC will make progress.
 261 *
 262 * And last we have the ec_list, which is used to pick segments for wear
 263 * leveling.
 264 *
 265 * If all appropriate lists are full, we simply free the candidate and forget
 266 * about that segment for a while.  We have better candidates for each purpose.
 267 */
 268static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
 269{
 270        struct logfs_super *super = logfs_super(sb);
 271        u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
 272
 273        if (cand->valid == 0) {
 274                /* 100% free segments */
 275                log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
 276                                cand->segno, cand->erase_count,
 277                                dev_ofs(sb, cand->segno, 0));
 278                cand = add_list(cand, &super->s_reserve_list);
 279                if (cand) {
 280                        log_gc_noisy("add free segment %x (ec %x) at %llx\n",
 281                                        cand->segno, cand->erase_count,
 282                                        dev_ofs(sb, cand->segno, 0));
 283                        cand = add_list(cand, &super->s_free_list);
 284                }
 285        } else {
 286                /* good candidates for Garbage Collection */
 287                if (cand->valid < full)
 288                        cand = add_list(cand, &super->s_low_list[cand->dist]);
 289                /* good candidates for wear leveling,
 290                 * segments that were recently written get ignored */
 291                if (cand)
 292                        cand = add_list(cand, &super->s_ec_list);
 293        }
 294        if (cand)
 295                free_candidate(sb, cand);
 296}
 297
 298static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
 299                u8 dist)
 300{
 301        struct logfs_super *super = logfs_super(sb);
 302        struct gc_candidate *cand;
 303
 304        cand = kmalloc(sizeof(*cand), GFP_NOFS);
 305        if (!cand)
 306                return -ENOMEM;
 307
 308        cand->segno = segno;
 309        cand->valid = valid;
 310        cand->erase_count = ec;
 311        cand->dist = dist;
 312
 313        btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
 314        __add_candidate(sb, cand);
 315        return 0;
 316}
 317
 318static void remove_segment_from_lists(struct super_block *sb, u32 segno)
 319{
 320        struct logfs_super *super = logfs_super(sb);
 321        struct gc_candidate *cand;
 322
 323        cand = btree_lookup32(&super->s_cand_tree, segno);
 324        if (cand) {
 325                remove_from_list(cand);
 326                free_candidate(sb, cand);
 327        }
 328}
 329
 330static void scan_segment(struct super_block *sb, u32 segno)
 331{
 332        u32 valid, ec = 0;
 333        gc_level_t gc_level = 0;
 334        u8 dist;
 335
 336        if (segment_is_reserved(sb, segno))
 337                return;
 338
 339        remove_segment_from_lists(sb, segno);
 340        valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
 341        if (valid == RESERVED)
 342                return;
 343
 344        dist = root_distance(sb, gc_level);
 345        add_candidate(sb, segno, valid, ec, dist);
 346}
 347
 348static struct gc_candidate *first_in_list(struct candidate_list *list)
 349{
 350        if (list->count == 0)
 351                return NULL;
 352        return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
 353}
 354
 355/*
 356 * Find the best segment for garbage collection.  Main criterion is
 357 * the segment requiring the least effort to clean.  Secondary
 358 * criterion is to GC on the lowest level available.
 359 *
 360 * So we search the least effort segment on the lowest level first,
 361 * then move up and pick another segment iff is requires significantly
 362 * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
 363 */
 364static struct gc_candidate *get_candidate(struct super_block *sb)
 365{
 366        struct logfs_super *super = logfs_super(sb);
 367        int i, max_dist;
 368        struct gc_candidate *cand = NULL, *this;
 369
 370        max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1);
 371
 372        for (i = max_dist; i >= 0; i--) {
 373                this = first_in_list(&super->s_low_list[i]);
 374                if (!this)
 375                        continue;
 376                if (!cand)
 377                        cand = this;
 378                if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
 379                        cand = this;
 380        }
 381        return cand;
 382}
 383
 384static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
 385{
 386        struct logfs_super *super = logfs_super(sb);
 387        gc_level_t gc_level;
 388        u32 cleaned, valid, segno, ec;
 389        u8 dist;
 390
 391        if (!cand) {
 392                log_gc("GC attempted, but no candidate found\n");
 393                return 0;
 394        }
 395
 396        segno = cand->segno;
 397        dist = cand->dist;
 398        valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
 399        free_candidate(sb, cand);
 400        log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
 401                        segno, (u64)segno << super->s_segshift,
 402                        dist, no_free_segments(sb), valid,
 403                        super->s_free_bytes);
 404        cleaned = logfs_gc_segment(sb, segno);
 405        log_gc("GC segment #%02x complete - now %x valid\n", segno,
 406                        valid - cleaned);
 407        BUG_ON(cleaned != valid);
 408        return 1;
 409}
 410
 411static int logfs_gc_once(struct super_block *sb)
 412{
 413        struct gc_candidate *cand;
 414
 415        cand = get_candidate(sb);
 416        if (cand)
 417                remove_from_list(cand);
 418        return __logfs_gc_once(sb, cand);
 419}
 420
 421/* returns 1 if a wrap occurs, 0 otherwise */
 422static int logfs_scan_some(struct super_block *sb)
 423{
 424        struct logfs_super *super = logfs_super(sb);
 425        u32 segno;
 426        int i, ret = 0;
 427
 428        segno = super->s_sweeper;
 429        for (i = SCAN_RATIO; i > 0; i--) {
 430                segno++;
 431                if (segno >= super->s_no_segs) {
 432                        segno = 0;
 433                        ret = 1;
 434                        /* Break out of the loop.  We want to read a single
 435                         * block from the segment size on next invocation if
 436                         * SCAN_RATIO is set to match block size
 437                         */
 438                        break;
 439                }
 440
 441                scan_segment(sb, segno);
 442        }
 443        super->s_sweeper = segno;
 444        return ret;
 445}
 446
 447/*
 448 * In principle, this function should loop forever, looking for GC candidates
 449 * and moving data.  LogFS is designed in such a way that this loop is
 450 * guaranteed to terminate.
 451 *
 452 * Limiting the loop to some iterations serves purely to catch cases when
 453 * these guarantees have failed.  An actual endless loop is an obvious bug
 454 * and should be reported as such.
 455 */
 456static void __logfs_gc_pass(struct super_block *sb, int target)
 457{
 458        struct logfs_super *super = logfs_super(sb);
 459        struct logfs_block *block;
 460        int round, progress, last_progress = 0;
 461
 462        /*
 463         * Doing too many changes to the segfile at once would result
 464         * in a large number of aliases.  Write the journal before
 465         * things get out of hand.
 466         */
 467        if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
 468                logfs_write_anchor(sb);
 469
 470        if (no_free_segments(sb) >= target &&
 471                        super->s_no_object_aliases < MAX_OBJ_ALIASES)
 472                return;
 473
 474        log_gc("__logfs_gc_pass(%x)\n", target);
 475        for (round = 0; round < SCAN_ROUNDS; ) {
 476                if (no_free_segments(sb) >= target)
 477                        goto write_alias;
 478
 479                /* Sync in-memory state with on-medium state in case they
 480                 * diverged */
 481                logfs_write_anchor(sb);
 482                round += logfs_scan_some(sb);
 483                if (no_free_segments(sb) >= target)
 484                        goto write_alias;
 485                progress = logfs_gc_once(sb);
 486                if (progress)
 487                        last_progress = round;
 488                else if (round - last_progress > 2)
 489                        break;
 490                continue;
 491
 492                /*
 493                 * The goto logic is nasty, I just don't know a better way to
 494                 * code it.  GC is supposed to ensure two things:
 495                 * 1. Enough free segments are available.
 496                 * 2. The number of aliases is bounded.
 497                 * When 1. is achieved, we take a look at 2. and write back
 498                 * some alias-containing blocks, if necessary.  However, after
 499                 * each such write we need to go back to 1., as writes can
 500                 * consume free segments.
 501                 */
 502write_alias:
 503                if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
 504                        return;
 505                if (list_empty(&super->s_object_alias)) {
 506                        /* All aliases are still in btree */
 507                        return;
 508                }
 509                log_gc("Write back one alias\n");
 510                block = list_entry(super->s_object_alias.next,
 511                                struct logfs_block, alias_list);
 512                block->ops->write_block(block);
 513                /*
 514                 * To round off the nasty goto logic, we reset round here.  It
 515                 * is a safety-net for GC not making any progress and limited
 516                 * to something reasonably small.  If incremented it for every
 517                 * single alias, the loop could terminate rather quickly.
 518                 */
 519                round = 0;
 520        }
 521        LOGFS_BUG(sb);
 522}
 523
 524static int wl_ratelimit(struct super_block *sb, u64 *next_event)
 525{
 526        struct logfs_super *super = logfs_super(sb);
 527
 528        if (*next_event < super->s_gec) {
 529                *next_event = super->s_gec + WL_RATELIMIT;
 530                return 0;
 531        }
 532        return 1;
 533}
 534
 535static void logfs_wl_pass(struct super_block *sb)
 536{
 537        struct logfs_super *super = logfs_super(sb);
 538        struct gc_candidate *wl_cand, *free_cand;
 539
 540        if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
 541                return;
 542
 543        wl_cand = first_in_list(&super->s_ec_list);
 544        if (!wl_cand)
 545                return;
 546        free_cand = first_in_list(&super->s_free_list);
 547        if (!free_cand)
 548                return;
 549
 550        if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
 551                remove_from_list(wl_cand);
 552                __logfs_gc_once(sb, wl_cand);
 553        }
 554}
 555
 556/*
 557 * The journal needs wear leveling as well.  But moving the journal is an
 558 * expensive operation so we try to avoid it as much as possible.  And if we
 559 * have to do it, we move the whole journal, not individual segments.
 560 *
 561 * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
 562 * calculations.  First we check whether moving the journal would be a
 563 * significant improvement.  That means that a) the current journal segments
 564 * have more wear than the future journal segments and b) the current journal
 565 * segments have more wear than normal ostore segments.
 566 * Rationale for b) is that we don't have to move the journal if it is aging
 567 * less than the ostore, even if the reserve segments age even less (they are
 568 * excluded from wear leveling, after all).
 569 * Next we check that the superblocks have less wear than the journal.  Since
 570 * moving the journal requires writing the superblocks, we have to protect the
 571 * superblocks even more than the journal.
 572 *
 573 * Also we double the acceptable wear difference, compared to ostore wear
 574 * leveling.  Journal data is read and rewritten rapidly, comparatively.  So
 575 * soft errors have much less time to accumulate and we allow the journal to
 576 * be a bit worse than the ostore.
 577 */
 578static void logfs_journal_wl_pass(struct super_block *sb)
 579{
 580        struct logfs_super *super = logfs_super(sb);
 581        struct gc_candidate *cand;
 582        u32 min_journal_ec = -1, max_reserve_ec = 0;
 583        int i;
 584
 585        if (wl_ratelimit(sb, &super->s_wl_gec_journal))
 586                return;
 587
 588        if (super->s_reserve_list.count < super->s_no_journal_segs) {
 589                /* Reserve is not full enough to move complete journal */
 590                return;
 591        }
 592
 593        journal_for_each(i)
 594                if (super->s_journal_seg[i])
 595                        min_journal_ec = min(min_journal_ec,
 596                                        super->s_journal_ec[i]);
 597        cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
 598                        struct gc_candidate, rb_node);
 599        max_reserve_ec = cand->erase_count;
 600        for (i = 0; i < 2; i++) {
 601                struct logfs_segment_entry se;
 602                u32 segno = seg_no(sb, super->s_sb_ofs[i]);
 603                u32 ec;
 604
 605                logfs_get_segment_entry(sb, segno, &se);
 606                ec = be32_to_cpu(se.ec_level) >> 4;
 607                max_reserve_ec = max(max_reserve_ec, ec);
 608        }
 609
 610        if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
 611                do_logfs_journal_wl_pass(sb);
 612        }
 613}
 614
 615void logfs_gc_pass(struct super_block *sb)
 616{
 617        struct logfs_super *super = logfs_super(sb);
 618
 619        //BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
 620        /* Write journal before free space is getting saturated with dirty
 621         * objects.
 622         */
 623        if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
 624                        + LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
 625                logfs_write_anchor(sb);
 626        __logfs_gc_pass(sb, super->s_total_levels);
 627        logfs_wl_pass(sb);
 628        logfs_journal_wl_pass(sb);
 629}
 630
 631static int check_area(struct super_block *sb, int i)
 632{
 633        struct logfs_super *super = logfs_super(sb);
 634        struct logfs_area *area = super->s_area[i];
 635        gc_level_t gc_level;
 636        u32 cleaned, valid, ec;
 637        u32 segno = area->a_segno;
 638        u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 639
 640        if (!area->a_is_open)
 641                return 0;
 642
 643        if (super->s_devops->can_write_buf(sb, ofs) == 0)
 644                return 0;
 645
 646        printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
 647        /*
 648         * The device cannot write back the write buffer.  Most likely the
 649         * wbuf was already written out and the system crashed at some point
 650         * before the journal commit happened.  In that case we wouldn't have
 651         * to do anything.  But if the crash happened before the wbuf was
 652         * written out correctly, we must GC this segment.  So assume the
 653         * worst and always do the GC run.
 654         */
 655        area->a_is_open = 0;
 656        valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
 657        cleaned = logfs_gc_segment(sb, segno);
 658        if (cleaned != valid)
 659                return -EIO;
 660        return 0;
 661}
 662
 663int logfs_check_areas(struct super_block *sb)
 664{
 665        int i, err;
 666
 667        for_each_area(i) {
 668                err = check_area(sb, i);
 669                if (err)
 670                        return err;
 671        }
 672        return 0;
 673}
 674
 675static void logfs_init_candlist(struct candidate_list *list, int maxcount,
 676                int sort_by_ec)
 677{
 678        list->count = 0;
 679        list->maxcount = maxcount;
 680        list->sort_by_ec = sort_by_ec;
 681        list->rb_tree = RB_ROOT;
 682}
 683
 684int logfs_init_gc(struct super_block *sb)
 685{
 686        struct logfs_super *super = logfs_super(sb);
 687        int i;
 688
 689        btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
 690        logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
 691        logfs_init_candlist(&super->s_reserve_list,
 692                        super->s_bad_seg_reserve, 1);
 693        for_each_area(i)
 694                logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
 695        logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
 696        return 0;
 697}
 698
 699static void logfs_cleanup_list(struct super_block *sb,
 700                struct candidate_list *list)
 701{
 702        struct gc_candidate *cand;
 703
 704        while (list->count) {
 705                cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
 706                                rb_node);
 707                remove_from_list(cand);
 708                free_candidate(sb, cand);
 709        }
 710        BUG_ON(list->rb_tree.rb_node);
 711}
 712
 713void logfs_cleanup_gc(struct super_block *sb)
 714{
 715        struct logfs_super *super = logfs_super(sb);
 716        int i;
 717
 718        if (!super->s_free_list.count)
 719                return;
 720
 721        /*
 722         * FIXME: The btree may still contain a single empty node.  So we
 723         * call the grim visitor to clean up that mess.  Btree code should
 724         * do it for us, really.
 725         */
 726        btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
 727        logfs_cleanup_list(sb, &super->s_free_list);
 728        logfs_cleanup_list(sb, &super->s_reserve_list);
 729        for_each_area(i)
 730                logfs_cleanup_list(sb, &super->s_low_list[i]);
 731        logfs_cleanup_list(sb, &super->s_ec_list);
 732}
 733