linux/fs/logfs/segment.c
<<
>>
Prefs
   1/*
   2 * fs/logfs/segment.c   - Handling the Object Store
   3 *
   4 * As should be obvious for Linux kernel code, license is GPLv2
   5 *
   6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7 *
   8 * Object store or ostore makes up the complete device with exception of
   9 * the superblock and journal areas.  Apart from its own metadata it stores
  10 * three kinds of objects: inodes, dentries and blocks, both data and indirect.
  11 */
  12#include "logfs.h"
  13#include <linux/slab.h>
  14
  15static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  16{
  17        struct logfs_super *super = logfs_super(sb);
  18        struct btree_head32 *head = &super->s_reserved_segments;
  19        int err;
  20
  21        err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
  22        if (err)
  23                return err;
  24        logfs_super(sb)->s_bad_segments++;
  25        /* FIXME: write to journal */
  26        return 0;
  27}
  28
  29int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
  30{
  31        struct logfs_super *super = logfs_super(sb);
  32
  33        super->s_gec++;
  34
  35        return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
  36                        super->s_segsize, ensure_erase);
  37}
  38
  39static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
  40{
  41        s32 ofs;
  42
  43        logfs_open_area(area, bytes);
  44
  45        ofs = area->a_used_bytes;
  46        area->a_used_bytes += bytes;
  47        BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
  48
  49        return dev_ofs(area->a_sb, area->a_segno, ofs);
  50}
  51
  52static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
  53                int use_filler)
  54{
  55        struct logfs_super *super = logfs_super(sb);
  56        struct address_space *mapping = super->s_mapping_inode->i_mapping;
  57        filler_t *filler = super->s_devops->readpage;
  58        struct page *page;
  59
  60        BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
  61        if (use_filler)
  62                page = read_cache_page(mapping, index, filler, sb);
  63        else {
  64                page = find_or_create_page(mapping, index, GFP_NOFS);
  65                unlock_page(page);
  66        }
  67        return page;
  68}
  69
  70int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
  71                int use_filler)
  72{
  73        pgoff_t index = ofs >> PAGE_SHIFT;
  74        struct page *page;
  75        long offset = ofs & (PAGE_SIZE-1);
  76        long copylen;
  77
  78        /* Only logfs_wbuf_recover may use len==0 */
  79        BUG_ON(!len && !use_filler);
  80        do {
  81                copylen = min((ulong)len, PAGE_SIZE - offset);
  82
  83                page = get_mapping_page(area->a_sb, index, use_filler);
  84                if (IS_ERR(page))
  85                        return PTR_ERR(page);
  86                BUG_ON(!page); /* FIXME: reserve a pool */
  87                SetPageUptodate(page);
  88                memcpy(page_address(page) + offset, buf, copylen);
  89                SetPagePrivate(page);
  90                page_cache_release(page);
  91
  92                buf += copylen;
  93                len -= copylen;
  94                offset = 0;
  95                index++;
  96        } while (len);
  97        return 0;
  98}
  99
 100static void pad_partial_page(struct logfs_area *area)
 101{
 102        struct super_block *sb = area->a_sb;
 103        struct page *page;
 104        u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 105        pgoff_t index = ofs >> PAGE_SHIFT;
 106        long offset = ofs & (PAGE_SIZE-1);
 107        u32 len = PAGE_SIZE - offset;
 108
 109        if (len % PAGE_SIZE) {
 110                page = get_mapping_page(sb, index, 0);
 111                BUG_ON(!page); /* FIXME: reserve a pool */
 112                memset(page_address(page) + offset, 0xff, len);
 113                SetPagePrivate(page);
 114                page_cache_release(page);
 115        }
 116}
 117
 118static void pad_full_pages(struct logfs_area *area)
 119{
 120        struct super_block *sb = area->a_sb;
 121        struct logfs_super *super = logfs_super(sb);
 122        u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 123        u32 len = super->s_segsize - area->a_used_bytes;
 124        pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
 125        pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
 126        struct page *page;
 127
 128        while (no_indizes) {
 129                page = get_mapping_page(sb, index, 0);
 130                BUG_ON(!page); /* FIXME: reserve a pool */
 131                SetPageUptodate(page);
 132                memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
 133                SetPagePrivate(page);
 134                page_cache_release(page);
 135                index++;
 136                no_indizes--;
 137        }
 138}
 139
 140/*
 141 * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
 142 * Also make sure we allocate (and memset) all pages for final writeout.
 143 */
 144static void pad_wbuf(struct logfs_area *area, int final)
 145{
 146        pad_partial_page(area);
 147        if (final)
 148                pad_full_pages(area);
 149}
 150
 151/*
 152 * We have to be careful with the alias tree.  Since lookup is done by bix,
 153 * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
 154 * indirect blocks.  So always use it through accessor functions.
 155 */
 156static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
 157                level_t level)
 158{
 159        struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 160        pgoff_t index = logfs_pack_index(bix, level);
 161
 162        return btree_lookup128(head, ino, index);
 163}
 164
 165static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
 166                level_t level, void *val)
 167{
 168        struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 169        pgoff_t index = logfs_pack_index(bix, level);
 170
 171        return btree_insert128(head, ino, index, val, GFP_NOFS);
 172}
 173
 174static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
 175                write_alias_t *write_one_alias)
 176{
 177        struct object_alias_item *item;
 178        int err;
 179
 180        list_for_each_entry(item, &block->item_list, list) {
 181                err = write_alias_journal(sb, block->ino, block->bix,
 182                                block->level, item->child_no, item->val);
 183                if (err)
 184                        return err;
 185        }
 186        return 0;
 187}
 188
 189static struct logfs_block_ops btree_block_ops = {
 190        .write_block    = btree_write_block,
 191        .free_block     = __free_block,
 192        .write_alias    = btree_write_alias,
 193};
 194
 195int logfs_load_object_aliases(struct super_block *sb,
 196                struct logfs_obj_alias *oa, int count)
 197{
 198        struct logfs_super *super = logfs_super(sb);
 199        struct logfs_block *block;
 200        struct object_alias_item *item;
 201        u64 ino, bix;
 202        level_t level;
 203        int i, err;
 204
 205        super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 206        count /= sizeof(*oa);
 207        for (i = 0; i < count; i++) {
 208                item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 209                if (!item)
 210                        return -ENOMEM;
 211                memset(item, 0, sizeof(*item));
 212
 213                super->s_no_object_aliases++;
 214                item->val = oa[i].val;
 215                item->child_no = be16_to_cpu(oa[i].child_no);
 216
 217                ino = be64_to_cpu(oa[i].ino);
 218                bix = be64_to_cpu(oa[i].bix);
 219                level = LEVEL(oa[i].level);
 220
 221                log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
 222                                ino, bix, level, item->child_no,
 223                                be64_to_cpu(item->val));
 224                block = alias_tree_lookup(sb, ino, bix, level);
 225                if (!block) {
 226                        block = __alloc_block(sb, ino, bix, level);
 227                        block->ops = &btree_block_ops;
 228                        err = alias_tree_insert(sb, ino, bix, level, block);
 229                        BUG_ON(err); /* mempool empty */
 230                }
 231                if (test_and_set_bit(item->child_no, block->alias_map)) {
 232                        printk(KERN_ERR"LogFS: Alias collision detected\n");
 233                        return -EIO;
 234                }
 235                list_move_tail(&block->alias_list, &super->s_object_alias);
 236                list_add(&item->list, &block->item_list);
 237        }
 238        return 0;
 239}
 240
 241static void kill_alias(void *_block, unsigned long ignore0,
 242                u64 ignore1, u64 ignore2, size_t ignore3)
 243{
 244        struct logfs_block *block = _block;
 245        struct super_block *sb = block->sb;
 246        struct logfs_super *super = logfs_super(sb);
 247        struct object_alias_item *item;
 248
 249        while (!list_empty(&block->item_list)) {
 250                item = list_entry(block->item_list.next, typeof(*item), list);
 251                list_del(&item->list);
 252                mempool_free(item, super->s_alias_pool);
 253        }
 254        block->ops->free_block(sb, block);
 255}
 256
 257static int obj_type(struct inode *inode, level_t level)
 258{
 259        if (level == 0) {
 260                if (S_ISDIR(inode->i_mode))
 261                        return OBJ_DENTRY;
 262                if (inode->i_ino == LOGFS_INO_MASTER)
 263                        return OBJ_INODE;
 264        }
 265        return OBJ_BLOCK;
 266}
 267
 268static int obj_len(struct super_block *sb, int obj_type)
 269{
 270        switch (obj_type) {
 271        case OBJ_DENTRY:
 272                return sizeof(struct logfs_disk_dentry);
 273        case OBJ_INODE:
 274                return sizeof(struct logfs_disk_inode);
 275        case OBJ_BLOCK:
 276                return sb->s_blocksize;
 277        default:
 278                BUG();
 279        }
 280}
 281
 282static int __logfs_segment_write(struct inode *inode, void *buf,
 283                struct logfs_shadow *shadow, int type, int len, int compr)
 284{
 285        struct logfs_area *area;
 286        struct super_block *sb = inode->i_sb;
 287        s64 ofs;
 288        struct logfs_object_header h;
 289        int acc_len;
 290
 291        if (shadow->gc_level == 0)
 292                acc_len = len;
 293        else
 294                acc_len = obj_len(sb, type);
 295
 296        area = get_area(sb, shadow->gc_level);
 297        ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
 298        LOGFS_BUG_ON(ofs <= 0, sb);
 299        /*
 300         * Order is important.  logfs_get_free_bytes(), by modifying the
 301         * segment file, may modify the content of the very page we're about
 302         * to write now.  Which is fine, as long as the calculated crc and
 303         * written data still match.  So do the modifications _before_
 304         * calculating the crc.
 305         */
 306
 307        h.len   = cpu_to_be16(len);
 308        h.type  = type;
 309        h.compr = compr;
 310        h.ino   = cpu_to_be64(inode->i_ino);
 311        h.bix   = cpu_to_be64(shadow->bix);
 312        h.crc   = logfs_crc32(&h, sizeof(h) - 4, 4);
 313        h.data_crc = logfs_crc32(buf, len, 0);
 314
 315        logfs_buf_write(area, ofs, &h, sizeof(h));
 316        logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
 317
 318        shadow->new_ofs = ofs;
 319        shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
 320
 321        return 0;
 322}
 323
 324static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
 325                struct logfs_shadow *shadow, int type, int len)
 326{
 327        struct super_block *sb = inode->i_sb;
 328        void *compressor_buf = logfs_super(sb)->s_compressed_je;
 329        ssize_t compr_len;
 330        int ret;
 331
 332        mutex_lock(&logfs_super(sb)->s_journal_mutex);
 333        compr_len = logfs_compress(buf, compressor_buf, len, len);
 334
 335        if (compr_len >= 0) {
 336                ret = __logfs_segment_write(inode, compressor_buf, shadow,
 337                                type, compr_len, COMPR_ZLIB);
 338        } else {
 339                ret = __logfs_segment_write(inode, buf, shadow, type, len,
 340                                COMPR_NONE);
 341        }
 342        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 343        return ret;
 344}
 345
 346/**
 347 * logfs_segment_write - write data block to object store
 348 * @inode:              inode containing data
 349 *
 350 * Returns an errno or zero.
 351 */
 352int logfs_segment_write(struct inode *inode, struct page *page,
 353                struct logfs_shadow *shadow)
 354{
 355        struct super_block *sb = inode->i_sb;
 356        struct logfs_super *super = logfs_super(sb);
 357        int do_compress, type, len;
 358        int ret;
 359        void *buf;
 360
 361        super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 362        BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 363        do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
 364        if (shadow->gc_level != 0) {
 365                /* temporarily disable compression for indirect blocks */
 366                do_compress = 0;
 367        }
 368
 369        type = obj_type(inode, shrink_level(shadow->gc_level));
 370        len = obj_len(sb, type);
 371        buf = kmap(page);
 372        if (do_compress)
 373                ret = logfs_segment_write_compress(inode, buf, shadow, type,
 374                                len);
 375        else
 376                ret = __logfs_segment_write(inode, buf, shadow, type, len,
 377                                COMPR_NONE);
 378        kunmap(page);
 379
 380        log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
 381                        shadow->ino, shadow->bix, shadow->gc_level,
 382                        shadow->old_ofs, shadow->new_ofs,
 383                        shadow->old_len, shadow->new_len);
 384        /* this BUG_ON did catch a locking bug.  useful */
 385        BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
 386        return ret;
 387}
 388
 389int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
 390{
 391        pgoff_t index = ofs >> PAGE_SHIFT;
 392        struct page *page;
 393        long offset = ofs & (PAGE_SIZE-1);
 394        long copylen;
 395
 396        while (len) {
 397                copylen = min((ulong)len, PAGE_SIZE - offset);
 398
 399                page = get_mapping_page(sb, index, 1);
 400                if (IS_ERR(page))
 401                        return PTR_ERR(page);
 402                memcpy(buf, page_address(page) + offset, copylen);
 403                page_cache_release(page);
 404
 405                buf += copylen;
 406                len -= copylen;
 407                offset = 0;
 408                index++;
 409        }
 410        return 0;
 411}
 412
 413/*
 414 * The "position" of indirect blocks is ambiguous.  It can be the position
 415 * of any data block somewhere behind this indirect block.  So we need to
 416 * normalize the positions through logfs_block_mask() before comparing.
 417 */
 418static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
 419{
 420        return  (pos1 & logfs_block_mask(sb, level)) !=
 421                (pos2 & logfs_block_mask(sb, level));
 422}
 423
 424#if 0
 425static int read_seg_header(struct super_block *sb, u64 ofs,
 426                struct logfs_segment_header *sh)
 427{
 428        __be32 crc;
 429        int err;
 430
 431        err = wbuf_read(sb, ofs, sizeof(*sh), sh);
 432        if (err)
 433                return err;
 434        crc = logfs_crc32(sh, sizeof(*sh), 4);
 435        if (crc != sh->crc) {
 436                printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 437                                "got %x\n", ofs, be32_to_cpu(sh->crc),
 438                                be32_to_cpu(crc));
 439                return -EIO;
 440        }
 441        return 0;
 442}
 443#endif
 444
 445static int read_obj_header(struct super_block *sb, u64 ofs,
 446                struct logfs_object_header *oh)
 447{
 448        __be32 crc;
 449        int err;
 450
 451        err = wbuf_read(sb, ofs, sizeof(*oh), oh);
 452        if (err)
 453                return err;
 454        crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
 455        if (crc != oh->crc) {
 456                printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 457                                "got %x\n", ofs, be32_to_cpu(oh->crc),
 458                                be32_to_cpu(crc));
 459                return -EIO;
 460        }
 461        return 0;
 462}
 463
 464static void move_btree_to_page(struct inode *inode, struct page *page,
 465                __be64 *data)
 466{
 467        struct super_block *sb = inode->i_sb;
 468        struct logfs_super *super = logfs_super(sb);
 469        struct btree_head128 *head = &super->s_object_alias_tree;
 470        struct logfs_block *block;
 471        struct object_alias_item *item, *next;
 472
 473        if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
 474                return;
 475
 476        block = btree_remove128(head, inode->i_ino, page->index);
 477        if (!block)
 478                return;
 479
 480        log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
 481                        block->ino, block->bix, block->level);
 482        list_for_each_entry_safe(item, next, &block->item_list, list) {
 483                data[item->child_no] = item->val;
 484                list_del(&item->list);
 485                mempool_free(item, super->s_alias_pool);
 486        }
 487        block->page = page;
 488        SetPagePrivate(page);
 489        page->private = (unsigned long)block;
 490        block->ops = &indirect_block_ops;
 491        initialize_block_counters(page, block, data, 0);
 492}
 493
 494/*
 495 * This silences a false, yet annoying gcc warning.  I hate it when my editor
 496 * jumps into bitops.h each time I recompile this file.
 497 * TODO: Complain to gcc folks about this and upgrade compiler.
 498 */
 499static unsigned long fnb(const unsigned long *addr,
 500                unsigned long size, unsigned long offset)
 501{
 502        return find_next_bit(addr, size, offset);
 503}
 504
 505void move_page_to_btree(struct page *page)
 506{
 507        struct logfs_block *block = logfs_block(page);
 508        struct super_block *sb = block->sb;
 509        struct logfs_super *super = logfs_super(sb);
 510        struct object_alias_item *item;
 511        unsigned long pos;
 512        __be64 *child;
 513        int err;
 514
 515        if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
 516                block->ops->free_block(sb, block);
 517                return;
 518        }
 519        log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
 520                        block->ino, block->bix, block->level);
 521        super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 522
 523        for (pos = 0; ; pos++) {
 524                pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
 525                if (pos >= LOGFS_BLOCK_FACTOR)
 526                        break;
 527
 528                item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 529                BUG_ON(!item); /* mempool empty */
 530                memset(item, 0, sizeof(*item));
 531
 532                child = kmap_atomic(page, KM_USER0);
 533                item->val = child[pos];
 534                kunmap_atomic(child, KM_USER0);
 535                item->child_no = pos;
 536                list_add(&item->list, &block->item_list);
 537        }
 538        block->page = NULL;
 539        ClearPagePrivate(page);
 540        page->private = 0;
 541        block->ops = &btree_block_ops;
 542        err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
 543                        block);
 544        BUG_ON(err); /* mempool empty */
 545        ClearPageUptodate(page);
 546}
 547
 548static int __logfs_segment_read(struct inode *inode, void *buf,
 549                u64 ofs, u64 bix, level_t level)
 550{
 551        struct super_block *sb = inode->i_sb;
 552        void *compressor_buf = logfs_super(sb)->s_compressed_je;
 553        struct logfs_object_header oh;
 554        __be32 crc;
 555        u16 len;
 556        int err, block_len;
 557
 558        block_len = obj_len(sb, obj_type(inode, level));
 559        err = read_obj_header(sb, ofs, &oh);
 560        if (err)
 561                goto out_err;
 562
 563        err = -EIO;
 564        if (be64_to_cpu(oh.ino) != inode->i_ino
 565                        || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
 566                printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
 567                                "expected (%lx, %llx), got (%llx, %llx)\n",
 568                                ofs, inode->i_ino, bix,
 569                                be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
 570                goto out_err;
 571        }
 572
 573        len = be16_to_cpu(oh.len);
 574
 575        switch (oh.compr) {
 576        case COMPR_NONE:
 577                err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
 578                if (err)
 579                        goto out_err;
 580                crc = logfs_crc32(buf, len, 0);
 581                if (crc != oh.data_crc) {
 582                        printk(KERN_ERR"LOGFS: uncompressed data crc error at "
 583                                        "%llx: expected %x, got %x\n", ofs,
 584                                        be32_to_cpu(oh.data_crc),
 585                                        be32_to_cpu(crc));
 586                        goto out_err;
 587                }
 588                break;
 589        case COMPR_ZLIB:
 590                mutex_lock(&logfs_super(sb)->s_journal_mutex);
 591                err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
 592                                compressor_buf);
 593                if (err) {
 594                        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 595                        goto out_err;
 596                }
 597                crc = logfs_crc32(compressor_buf, len, 0);
 598                if (crc != oh.data_crc) {
 599                        printk(KERN_ERR"LOGFS: compressed data crc error at "
 600                                        "%llx: expected %x, got %x\n", ofs,
 601                                        be32_to_cpu(oh.data_crc),
 602                                        be32_to_cpu(crc));
 603                        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 604                        goto out_err;
 605                }
 606                err = logfs_uncompress(compressor_buf, buf, len, block_len);
 607                mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 608                if (err) {
 609                        printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
 610                        goto out_err;
 611                }
 612                break;
 613        default:
 614                LOGFS_BUG(sb);
 615                err = -EIO;
 616                goto out_err;
 617        }
 618        return 0;
 619
 620out_err:
 621        logfs_set_ro(sb);
 622        printk(KERN_ERR"LOGFS: device is read-only now\n");
 623        LOGFS_BUG(sb);
 624        return err;
 625}
 626
 627/**
 628 * logfs_segment_read - read data block from object store
 629 * @inode:              inode containing data
 630 * @buf:                data buffer
 631 * @ofs:                physical data offset
 632 * @bix:                block index
 633 * @level:              block level
 634 *
 635 * Returns 0 on success or a negative errno.
 636 */
 637int logfs_segment_read(struct inode *inode, struct page *page,
 638                u64 ofs, u64 bix, level_t level)
 639{
 640        int err;
 641        void *buf;
 642
 643        if (PageUptodate(page))
 644                return 0;
 645
 646        ofs &= ~LOGFS_FULLY_POPULATED;
 647
 648        buf = kmap(page);
 649        err = __logfs_segment_read(inode, buf, ofs, bix, level);
 650        if (!err) {
 651                move_btree_to_page(inode, page, buf);
 652                SetPageUptodate(page);
 653        }
 654        kunmap(page);
 655        log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
 656                        inode->i_ino, bix, level, ofs, err);
 657        return err;
 658}
 659
 660int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
 661{
 662        struct super_block *sb = inode->i_sb;
 663        struct logfs_super *super = logfs_super(sb);
 664        struct logfs_object_header h;
 665        u16 len;
 666        int err;
 667
 668        super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 669        BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 670        BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
 671        if (!shadow->old_ofs)
 672                return 0;
 673
 674        log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
 675                        shadow->ino, shadow->bix, shadow->gc_level,
 676                        shadow->old_ofs, shadow->new_ofs,
 677                        shadow->old_len, shadow->new_len);
 678        err = read_obj_header(sb, shadow->old_ofs, &h);
 679        LOGFS_BUG_ON(err, sb);
 680        LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
 681        LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
 682                                shrink_level(shadow->gc_level)), sb);
 683
 684        if (shadow->gc_level == 0)
 685                len = be16_to_cpu(h.len);
 686        else
 687                len = obj_len(sb, h.type);
 688        shadow->old_len = len + sizeof(h);
 689        return 0;
 690}
 691
 692void freeseg(struct super_block *sb, u32 segno)
 693{
 694        struct logfs_super *super = logfs_super(sb);
 695        struct address_space *mapping = super->s_mapping_inode->i_mapping;
 696        struct page *page;
 697        u64 ofs, start, end;
 698
 699        start = dev_ofs(sb, segno, 0);
 700        end = dev_ofs(sb, segno + 1, 0);
 701        for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
 702                page = find_get_page(mapping, ofs >> PAGE_SHIFT);
 703                if (!page)
 704                        continue;
 705                ClearPagePrivate(page);
 706                page_cache_release(page);
 707        }
 708}
 709
 710int logfs_open_area(struct logfs_area *area, size_t bytes)
 711{
 712        struct super_block *sb = area->a_sb;
 713        struct logfs_super *super = logfs_super(sb);
 714        int err, closed = 0;
 715
 716        if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
 717                return 0;
 718
 719        if (area->a_is_open) {
 720                u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 721                u32 len = super->s_segsize - area->a_written_bytes;
 722
 723                log_gc("logfs_close_area(%x)\n", area->a_segno);
 724                pad_wbuf(area, 1);
 725                super->s_devops->writeseg(area->a_sb, ofs, len);
 726                freeseg(sb, area->a_segno);
 727                closed = 1;
 728        }
 729
 730        area->a_used_bytes = 0;
 731        area->a_written_bytes = 0;
 732again:
 733        area->a_ops->get_free_segment(area);
 734        area->a_ops->get_erase_count(area);
 735
 736        log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
 737        err = area->a_ops->erase_segment(area);
 738        if (err) {
 739                printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
 740                                area->a_segno);
 741                logfs_mark_segment_bad(sb, area->a_segno);
 742                goto again;
 743        }
 744        area->a_is_open = 1;
 745        return closed;
 746}
 747
 748void logfs_sync_area(struct logfs_area *area)
 749{
 750        struct super_block *sb = area->a_sb;
 751        struct logfs_super *super = logfs_super(sb);
 752        u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 753        u32 len = (area->a_used_bytes - area->a_written_bytes);
 754
 755        if (super->s_writesize)
 756                len &= ~(super->s_writesize - 1);
 757        if (len == 0)
 758                return;
 759        pad_wbuf(area, 0);
 760        super->s_devops->writeseg(sb, ofs, len);
 761        area->a_written_bytes += len;
 762}
 763
 764void logfs_sync_segments(struct super_block *sb)
 765{
 766        struct logfs_super *super = logfs_super(sb);
 767        int i;
 768
 769        for_each_area(i)
 770                logfs_sync_area(super->s_area[i]);
 771}
 772
 773/*
 774 * Pick a free segment to be used for this area.  Effectively takes a
 775 * candidate from the free list (not really a candidate anymore).
 776 */
 777static void ostore_get_free_segment(struct logfs_area *area)
 778{
 779        struct super_block *sb = area->a_sb;
 780        struct logfs_super *super = logfs_super(sb);
 781
 782        if (super->s_free_list.count == 0) {
 783                printk(KERN_ERR"LOGFS: ran out of free segments\n");
 784                LOGFS_BUG(sb);
 785        }
 786
 787        area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
 788}
 789
 790static void ostore_get_erase_count(struct logfs_area *area)
 791{
 792        struct logfs_segment_entry se;
 793        u32 ec_level;
 794
 795        logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
 796        BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
 797                        se.valid == cpu_to_be32(RESERVED));
 798
 799        ec_level = be32_to_cpu(se.ec_level);
 800        area->a_erase_count = (ec_level >> 4) + 1;
 801}
 802
 803static int ostore_erase_segment(struct logfs_area *area)
 804{
 805        struct super_block *sb = area->a_sb;
 806        struct logfs_segment_header sh;
 807        u64 ofs;
 808        int err;
 809
 810        err = logfs_erase_segment(sb, area->a_segno, 0);
 811        if (err)
 812                return err;
 813
 814        sh.pad = 0;
 815        sh.type = SEG_OSTORE;
 816        sh.level = (__force u8)area->a_level;
 817        sh.segno = cpu_to_be32(area->a_segno);
 818        sh.ec = cpu_to_be32(area->a_erase_count);
 819        sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
 820        sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
 821
 822        logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
 823                        area->a_level);
 824
 825        ofs = dev_ofs(sb, area->a_segno, 0);
 826        area->a_used_bytes = sizeof(sh);
 827        logfs_buf_write(area, ofs, &sh, sizeof(sh));
 828        return 0;
 829}
 830
 831static const struct logfs_area_ops ostore_area_ops = {
 832        .get_free_segment       = ostore_get_free_segment,
 833        .get_erase_count        = ostore_get_erase_count,
 834        .erase_segment          = ostore_erase_segment,
 835};
 836
 837static void free_area(struct logfs_area *area)
 838{
 839        if (area)
 840                freeseg(area->a_sb, area->a_segno);
 841        kfree(area);
 842}
 843
 844static struct logfs_area *alloc_area(struct super_block *sb)
 845{
 846        struct logfs_area *area;
 847
 848        area = kzalloc(sizeof(*area), GFP_KERNEL);
 849        if (!area)
 850                return NULL;
 851
 852        area->a_sb = sb;
 853        return area;
 854}
 855
 856static void map_invalidatepage(struct page *page, unsigned long l)
 857{
 858        BUG();
 859}
 860
 861static int map_releasepage(struct page *page, gfp_t g)
 862{
 863        /* Don't release these pages */
 864        return 0;
 865}
 866
 867static const struct address_space_operations mapping_aops = {
 868        .invalidatepage = map_invalidatepage,
 869        .releasepage    = map_releasepage,
 870        .set_page_dirty = __set_page_dirty_nobuffers,
 871};
 872
 873int logfs_init_mapping(struct super_block *sb)
 874{
 875        struct logfs_super *super = logfs_super(sb);
 876        struct address_space *mapping;
 877        struct inode *inode;
 878
 879        inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
 880        if (IS_ERR(inode))
 881                return PTR_ERR(inode);
 882        super->s_mapping_inode = inode;
 883        mapping = inode->i_mapping;
 884        mapping->a_ops = &mapping_aops;
 885        /* Would it be possible to use __GFP_HIGHMEM as well? */
 886        mapping_set_gfp_mask(mapping, GFP_NOFS);
 887        return 0;
 888}
 889
 890int logfs_init_areas(struct super_block *sb)
 891{
 892        struct logfs_super *super = logfs_super(sb);
 893        int i = -1;
 894
 895        super->s_alias_pool = mempool_create_kmalloc_pool(600,
 896                        sizeof(struct object_alias_item));
 897        if (!super->s_alias_pool)
 898                return -ENOMEM;
 899
 900        super->s_journal_area = alloc_area(sb);
 901        if (!super->s_journal_area)
 902                goto err;
 903
 904        for_each_area(i) {
 905                super->s_area[i] = alloc_area(sb);
 906                if (!super->s_area[i])
 907                        goto err;
 908                super->s_area[i]->a_level = GC_LEVEL(i);
 909                super->s_area[i]->a_ops = &ostore_area_ops;
 910        }
 911        btree_init_mempool128(&super->s_object_alias_tree,
 912                        super->s_btree_pool);
 913        return 0;
 914
 915err:
 916        for (i--; i >= 0; i--)
 917                free_area(super->s_area[i]);
 918        free_area(super->s_journal_area);
 919        logfs_mempool_destroy(super->s_alias_pool);
 920        return -ENOMEM;
 921}
 922
 923void logfs_cleanup_areas(struct super_block *sb)
 924{
 925        struct logfs_super *super = logfs_super(sb);
 926        int i;
 927
 928        btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
 929        for_each_area(i)
 930                free_area(super->s_area[i]);
 931        free_area(super->s_journal_area);
 932}
 933