linux/fs/logfs/segment.c
<<
>>
Prefs
   1/*
   2 * fs/logfs/segment.c   - Handling the Object Store
   3 *
   4 * As should be obvious for Linux kernel code, license is GPLv2
   5 *
   6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7 *
   8 * Object store or ostore makes up the complete device with exception of
   9 * the superblock and journal areas.  Apart from its own metadata it stores
  10 * three kinds of objects: inodes, dentries and blocks, both data and indirect.
  11 */
  12#include "logfs.h"
  13#include <linux/slab.h>
  14
  15static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  16{
  17        struct logfs_super *super = logfs_super(sb);
  18        struct btree_head32 *head = &super->s_reserved_segments;
  19        int err;
  20
  21        err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
  22        if (err)
  23                return err;
  24        logfs_super(sb)->s_bad_segments++;
  25        /* FIXME: write to journal */
  26        return 0;
  27}
  28
  29int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
  30{
  31        struct logfs_super *super = logfs_super(sb);
  32
  33        super->s_gec++;
  34
  35        return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
  36                        super->s_segsize, ensure_erase);
  37}
  38
  39static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
  40{
  41        s32 ofs;
  42
  43        logfs_open_area(area, bytes);
  44
  45        ofs = area->a_used_bytes;
  46        area->a_used_bytes += bytes;
  47        BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
  48
  49        return dev_ofs(area->a_sb, area->a_segno, ofs);
  50}
  51
  52static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
  53                int use_filler)
  54{
  55        struct logfs_super *super = logfs_super(sb);
  56        struct address_space *mapping = super->s_mapping_inode->i_mapping;
  57        filler_t *filler = super->s_devops->readpage;
  58        struct page *page;
  59
  60        BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS));
  61        if (use_filler)
  62                page = read_cache_page(mapping, index, filler, sb);
  63        else {
  64                page = find_or_create_page(mapping, index, GFP_NOFS);
  65                if (page)
  66                        unlock_page(page);
  67        }
  68        return page;
  69}
  70
  71int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
  72                int use_filler)
  73{
  74        pgoff_t index = ofs >> PAGE_SHIFT;
  75        struct page *page;
  76        long offset = ofs & (PAGE_SIZE-1);
  77        long copylen;
  78
  79        /* Only logfs_wbuf_recover may use len==0 */
  80        BUG_ON(!len && !use_filler);
  81        do {
  82                copylen = min((ulong)len, PAGE_SIZE - offset);
  83
  84                page = get_mapping_page(area->a_sb, index, use_filler);
  85                if (IS_ERR(page))
  86                        return PTR_ERR(page);
  87                BUG_ON(!page); /* FIXME: reserve a pool */
  88                SetPageUptodate(page);
  89                memcpy(page_address(page) + offset, buf, copylen);
  90
  91                if (!PagePrivate(page)) {
  92                        SetPagePrivate(page);
  93                        get_page(page);
  94                }
  95                put_page(page);
  96
  97                buf += copylen;
  98                len -= copylen;
  99                offset = 0;
 100                index++;
 101        } while (len);
 102        return 0;
 103}
 104
 105static void pad_partial_page(struct logfs_area *area)
 106{
 107        struct super_block *sb = area->a_sb;
 108        struct page *page;
 109        u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 110        pgoff_t index = ofs >> PAGE_SHIFT;
 111        long offset = ofs & (PAGE_SIZE-1);
 112        u32 len = PAGE_SIZE - offset;
 113
 114        if (len % PAGE_SIZE) {
 115                page = get_mapping_page(sb, index, 0);
 116                BUG_ON(!page); /* FIXME: reserve a pool */
 117                memset(page_address(page) + offset, 0xff, len);
 118                if (!PagePrivate(page)) {
 119                        SetPagePrivate(page);
 120                        get_page(page);
 121                }
 122                put_page(page);
 123        }
 124}
 125
 126static void pad_full_pages(struct logfs_area *area)
 127{
 128        struct super_block *sb = area->a_sb;
 129        struct logfs_super *super = logfs_super(sb);
 130        u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 131        u32 len = super->s_segsize - area->a_used_bytes;
 132        pgoff_t index = PAGE_ALIGN(ofs) >> PAGE_SHIFT;
 133        pgoff_t no_indizes = len >> PAGE_SHIFT;
 134        struct page *page;
 135
 136        while (no_indizes) {
 137                page = get_mapping_page(sb, index, 0);
 138                BUG_ON(!page); /* FIXME: reserve a pool */
 139                SetPageUptodate(page);
 140                memset(page_address(page), 0xff, PAGE_SIZE);
 141                if (!PagePrivate(page)) {
 142                        SetPagePrivate(page);
 143                        get_page(page);
 144                }
 145                put_page(page);
 146                index++;
 147                no_indizes--;
 148        }
 149}
 150
 151/*
 152 * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
 153 * Also make sure we allocate (and memset) all pages for final writeout.
 154 */
 155static void pad_wbuf(struct logfs_area *area, int final)
 156{
 157        pad_partial_page(area);
 158        if (final)
 159                pad_full_pages(area);
 160}
 161
 162/*
 163 * We have to be careful with the alias tree.  Since lookup is done by bix,
 164 * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
 165 * indirect blocks.  So always use it through accessor functions.
 166 */
 167static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
 168                level_t level)
 169{
 170        struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 171        pgoff_t index = logfs_pack_index(bix, level);
 172
 173        return btree_lookup128(head, ino, index);
 174}
 175
 176static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
 177                level_t level, void *val)
 178{
 179        struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 180        pgoff_t index = logfs_pack_index(bix, level);
 181
 182        return btree_insert128(head, ino, index, val, GFP_NOFS);
 183}
 184
 185static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
 186                write_alias_t *write_one_alias)
 187{
 188        struct object_alias_item *item;
 189        int err;
 190
 191        list_for_each_entry(item, &block->item_list, list) {
 192                err = write_alias_journal(sb, block->ino, block->bix,
 193                                block->level, item->child_no, item->val);
 194                if (err)
 195                        return err;
 196        }
 197        return 0;
 198}
 199
 200static const struct logfs_block_ops btree_block_ops = {
 201        .write_block    = btree_write_block,
 202        .free_block     = __free_block,
 203        .write_alias    = btree_write_alias,
 204};
 205
 206int logfs_load_object_aliases(struct super_block *sb,
 207                struct logfs_obj_alias *oa, int count)
 208{
 209        struct logfs_super *super = logfs_super(sb);
 210        struct logfs_block *block;
 211        struct object_alias_item *item;
 212        u64 ino, bix;
 213        level_t level;
 214        int i, err;
 215
 216        super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 217        count /= sizeof(*oa);
 218        for (i = 0; i < count; i++) {
 219                item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 220                if (!item)
 221                        return -ENOMEM;
 222                memset(item, 0, sizeof(*item));
 223
 224                super->s_no_object_aliases++;
 225                item->val = oa[i].val;
 226                item->child_no = be16_to_cpu(oa[i].child_no);
 227
 228                ino = be64_to_cpu(oa[i].ino);
 229                bix = be64_to_cpu(oa[i].bix);
 230                level = LEVEL(oa[i].level);
 231
 232                log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
 233                                ino, bix, level, item->child_no,
 234                                be64_to_cpu(item->val));
 235                block = alias_tree_lookup(sb, ino, bix, level);
 236                if (!block) {
 237                        block = __alloc_block(sb, ino, bix, level);
 238                        block->ops = &btree_block_ops;
 239                        err = alias_tree_insert(sb, ino, bix, level, block);
 240                        BUG_ON(err); /* mempool empty */
 241                }
 242                if (test_and_set_bit(item->child_no, block->alias_map)) {
 243                        printk(KERN_ERR"LogFS: Alias collision detected\n");
 244                        return -EIO;
 245                }
 246                list_move_tail(&block->alias_list, &super->s_object_alias);
 247                list_add(&item->list, &block->item_list);
 248        }
 249        return 0;
 250}
 251
 252static void kill_alias(void *_block, unsigned long ignore0,
 253                u64 ignore1, u64 ignore2, size_t ignore3)
 254{
 255        struct logfs_block *block = _block;
 256        struct super_block *sb = block->sb;
 257        struct logfs_super *super = logfs_super(sb);
 258        struct object_alias_item *item;
 259
 260        while (!list_empty(&block->item_list)) {
 261                item = list_entry(block->item_list.next, typeof(*item), list);
 262                list_del(&item->list);
 263                mempool_free(item, super->s_alias_pool);
 264        }
 265        block->ops->free_block(sb, block);
 266}
 267
 268static int obj_type(struct inode *inode, level_t level)
 269{
 270        if (level == 0) {
 271                if (S_ISDIR(inode->i_mode))
 272                        return OBJ_DENTRY;
 273                if (inode->i_ino == LOGFS_INO_MASTER)
 274                        return OBJ_INODE;
 275        }
 276        return OBJ_BLOCK;
 277}
 278
 279static int obj_len(struct super_block *sb, int obj_type)
 280{
 281        switch (obj_type) {
 282        case OBJ_DENTRY:
 283                return sizeof(struct logfs_disk_dentry);
 284        case OBJ_INODE:
 285                return sizeof(struct logfs_disk_inode);
 286        case OBJ_BLOCK:
 287                return sb->s_blocksize;
 288        default:
 289                BUG();
 290        }
 291}
 292
 293static int __logfs_segment_write(struct inode *inode, void *buf,
 294                struct logfs_shadow *shadow, int type, int len, int compr)
 295{
 296        struct logfs_area *area;
 297        struct super_block *sb = inode->i_sb;
 298        s64 ofs;
 299        struct logfs_object_header h;
 300        int acc_len;
 301
 302        if (shadow->gc_level == 0)
 303                acc_len = len;
 304        else
 305                acc_len = obj_len(sb, type);
 306
 307        area = get_area(sb, shadow->gc_level);
 308        ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
 309        LOGFS_BUG_ON(ofs <= 0, sb);
 310        /*
 311         * Order is important.  logfs_get_free_bytes(), by modifying the
 312         * segment file, may modify the content of the very page we're about
 313         * to write now.  Which is fine, as long as the calculated crc and
 314         * written data still match.  So do the modifications _before_
 315         * calculating the crc.
 316         */
 317
 318        h.len   = cpu_to_be16(len);
 319        h.type  = type;
 320        h.compr = compr;
 321        h.ino   = cpu_to_be64(inode->i_ino);
 322        h.bix   = cpu_to_be64(shadow->bix);
 323        h.crc   = logfs_crc32(&h, sizeof(h) - 4, 4);
 324        h.data_crc = logfs_crc32(buf, len, 0);
 325
 326        logfs_buf_write(area, ofs, &h, sizeof(h));
 327        logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
 328
 329        shadow->new_ofs = ofs;
 330        shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
 331
 332        return 0;
 333}
 334
 335static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
 336                struct logfs_shadow *shadow, int type, int len)
 337{
 338        struct super_block *sb = inode->i_sb;
 339        void *compressor_buf = logfs_super(sb)->s_compressed_je;
 340        ssize_t compr_len;
 341        int ret;
 342
 343        mutex_lock(&logfs_super(sb)->s_journal_mutex);
 344        compr_len = logfs_compress(buf, compressor_buf, len, len);
 345
 346        if (compr_len >= 0) {
 347                ret = __logfs_segment_write(inode, compressor_buf, shadow,
 348                                type, compr_len, COMPR_ZLIB);
 349        } else {
 350                ret = __logfs_segment_write(inode, buf, shadow, type, len,
 351                                COMPR_NONE);
 352        }
 353        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 354        return ret;
 355}
 356
 357/**
 358 * logfs_segment_write - write data block to object store
 359 * @inode:              inode containing data
 360 *
 361 * Returns an errno or zero.
 362 */
 363int logfs_segment_write(struct inode *inode, struct page *page,
 364                struct logfs_shadow *shadow)
 365{
 366        struct super_block *sb = inode->i_sb;
 367        struct logfs_super *super = logfs_super(sb);
 368        int do_compress, type, len;
 369        int ret;
 370        void *buf;
 371
 372        super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 373        BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 374        do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
 375        if (shadow->gc_level != 0) {
 376                /* temporarily disable compression for indirect blocks */
 377                do_compress = 0;
 378        }
 379
 380        type = obj_type(inode, shrink_level(shadow->gc_level));
 381        len = obj_len(sb, type);
 382        buf = kmap(page);
 383        if (do_compress)
 384                ret = logfs_segment_write_compress(inode, buf, shadow, type,
 385                                len);
 386        else
 387                ret = __logfs_segment_write(inode, buf, shadow, type, len,
 388                                COMPR_NONE);
 389        kunmap(page);
 390
 391        log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
 392                        shadow->ino, shadow->bix, shadow->gc_level,
 393                        shadow->old_ofs, shadow->new_ofs,
 394                        shadow->old_len, shadow->new_len);
 395        /* this BUG_ON did catch a locking bug.  useful */
 396        BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
 397        return ret;
 398}
 399
 400int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
 401{
 402        pgoff_t index = ofs >> PAGE_SHIFT;
 403        struct page *page;
 404        long offset = ofs & (PAGE_SIZE-1);
 405        long copylen;
 406
 407        while (len) {
 408                copylen = min((ulong)len, PAGE_SIZE - offset);
 409
 410                page = get_mapping_page(sb, index, 1);
 411                if (IS_ERR(page))
 412                        return PTR_ERR(page);
 413                memcpy(buf, page_address(page) + offset, copylen);
 414                put_page(page);
 415
 416                buf += copylen;
 417                len -= copylen;
 418                offset = 0;
 419                index++;
 420        }
 421        return 0;
 422}
 423
 424/*
 425 * The "position" of indirect blocks is ambiguous.  It can be the position
 426 * of any data block somewhere behind this indirect block.  So we need to
 427 * normalize the positions through logfs_block_mask() before comparing.
 428 */
 429static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
 430{
 431        return  (pos1 & logfs_block_mask(sb, level)) !=
 432                (pos2 & logfs_block_mask(sb, level));
 433}
 434
 435#if 0
 436static int read_seg_header(struct super_block *sb, u64 ofs,
 437                struct logfs_segment_header *sh)
 438{
 439        __be32 crc;
 440        int err;
 441
 442        err = wbuf_read(sb, ofs, sizeof(*sh), sh);
 443        if (err)
 444                return err;
 445        crc = logfs_crc32(sh, sizeof(*sh), 4);
 446        if (crc != sh->crc) {
 447                printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 448                                "got %x\n", ofs, be32_to_cpu(sh->crc),
 449                                be32_to_cpu(crc));
 450                return -EIO;
 451        }
 452        return 0;
 453}
 454#endif
 455
 456static int read_obj_header(struct super_block *sb, u64 ofs,
 457                struct logfs_object_header *oh)
 458{
 459        __be32 crc;
 460        int err;
 461
 462        err = wbuf_read(sb, ofs, sizeof(*oh), oh);
 463        if (err)
 464                return err;
 465        crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
 466        if (crc != oh->crc) {
 467                printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 468                                "got %x\n", ofs, be32_to_cpu(oh->crc),
 469                                be32_to_cpu(crc));
 470                return -EIO;
 471        }
 472        return 0;
 473}
 474
 475static void move_btree_to_page(struct inode *inode, struct page *page,
 476                __be64 *data)
 477{
 478        struct super_block *sb = inode->i_sb;
 479        struct logfs_super *super = logfs_super(sb);
 480        struct btree_head128 *head = &super->s_object_alias_tree;
 481        struct logfs_block *block;
 482        struct object_alias_item *item, *next;
 483
 484        if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
 485                return;
 486
 487        block = btree_remove128(head, inode->i_ino, page->index);
 488        if (!block)
 489                return;
 490
 491        log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
 492                        block->ino, block->bix, block->level);
 493        list_for_each_entry_safe(item, next, &block->item_list, list) {
 494                data[item->child_no] = item->val;
 495                list_del(&item->list);
 496                mempool_free(item, super->s_alias_pool);
 497        }
 498        block->page = page;
 499
 500        if (!PagePrivate(page)) {
 501                SetPagePrivate(page);
 502                get_page(page);
 503                set_page_private(page, (unsigned long) block);
 504        }
 505        block->ops = &indirect_block_ops;
 506        initialize_block_counters(page, block, data, 0);
 507}
 508
 509/*
 510 * This silences a false, yet annoying gcc warning.  I hate it when my editor
 511 * jumps into bitops.h each time I recompile this file.
 512 * TODO: Complain to gcc folks about this and upgrade compiler.
 513 */
 514static unsigned long fnb(const unsigned long *addr,
 515                unsigned long size, unsigned long offset)
 516{
 517        return find_next_bit(addr, size, offset);
 518}
 519
 520void move_page_to_btree(struct page *page)
 521{
 522        struct logfs_block *block = logfs_block(page);
 523        struct super_block *sb = block->sb;
 524        struct logfs_super *super = logfs_super(sb);
 525        struct object_alias_item *item;
 526        unsigned long pos;
 527        __be64 *child;
 528        int err;
 529
 530        if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
 531                block->ops->free_block(sb, block);
 532                return;
 533        }
 534        log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
 535                        block->ino, block->bix, block->level);
 536        super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 537
 538        for (pos = 0; ; pos++) {
 539                pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
 540                if (pos >= LOGFS_BLOCK_FACTOR)
 541                        break;
 542
 543                item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 544                BUG_ON(!item); /* mempool empty */
 545                memset(item, 0, sizeof(*item));
 546
 547                child = kmap_atomic(page);
 548                item->val = child[pos];
 549                kunmap_atomic(child);
 550                item->child_no = pos;
 551                list_add(&item->list, &block->item_list);
 552        }
 553        block->page = NULL;
 554
 555        if (PagePrivate(page)) {
 556                ClearPagePrivate(page);
 557                put_page(page);
 558                set_page_private(page, 0);
 559        }
 560        block->ops = &btree_block_ops;
 561        err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
 562                        block);
 563        BUG_ON(err); /* mempool empty */
 564        ClearPageUptodate(page);
 565}
 566
 567static int __logfs_segment_read(struct inode *inode, void *buf,
 568                u64 ofs, u64 bix, level_t level)
 569{
 570        struct super_block *sb = inode->i_sb;
 571        void *compressor_buf = logfs_super(sb)->s_compressed_je;
 572        struct logfs_object_header oh;
 573        __be32 crc;
 574        u16 len;
 575        int err, block_len;
 576
 577        block_len = obj_len(sb, obj_type(inode, level));
 578        err = read_obj_header(sb, ofs, &oh);
 579        if (err)
 580                goto out_err;
 581
 582        err = -EIO;
 583        if (be64_to_cpu(oh.ino) != inode->i_ino
 584                        || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
 585                printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
 586                                "expected (%lx, %llx), got (%llx, %llx)\n",
 587                                ofs, inode->i_ino, bix,
 588                                be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
 589                goto out_err;
 590        }
 591
 592        len = be16_to_cpu(oh.len);
 593
 594        switch (oh.compr) {
 595        case COMPR_NONE:
 596                err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
 597                if (err)
 598                        goto out_err;
 599                crc = logfs_crc32(buf, len, 0);
 600                if (crc != oh.data_crc) {
 601                        printk(KERN_ERR"LOGFS: uncompressed data crc error at "
 602                                        "%llx: expected %x, got %x\n", ofs,
 603                                        be32_to_cpu(oh.data_crc),
 604                                        be32_to_cpu(crc));
 605                        goto out_err;
 606                }
 607                break;
 608        case COMPR_ZLIB:
 609                mutex_lock(&logfs_super(sb)->s_journal_mutex);
 610                err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
 611                                compressor_buf);
 612                if (err) {
 613                        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 614                        goto out_err;
 615                }
 616                crc = logfs_crc32(compressor_buf, len, 0);
 617                if (crc != oh.data_crc) {
 618                        printk(KERN_ERR"LOGFS: compressed data crc error at "
 619                                        "%llx: expected %x, got %x\n", ofs,
 620                                        be32_to_cpu(oh.data_crc),
 621                                        be32_to_cpu(crc));
 622                        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 623                        goto out_err;
 624                }
 625                err = logfs_uncompress(compressor_buf, buf, len, block_len);
 626                mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 627                if (err) {
 628                        printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
 629                        goto out_err;
 630                }
 631                break;
 632        default:
 633                LOGFS_BUG(sb);
 634                err = -EIO;
 635                goto out_err;
 636        }
 637        return 0;
 638
 639out_err:
 640        logfs_set_ro(sb);
 641        printk(KERN_ERR"LOGFS: device is read-only now\n");
 642        LOGFS_BUG(sb);
 643        return err;
 644}
 645
 646/**
 647 * logfs_segment_read - read data block from object store
 648 * @inode:              inode containing data
 649 * @buf:                data buffer
 650 * @ofs:                physical data offset
 651 * @bix:                block index
 652 * @level:              block level
 653 *
 654 * Returns 0 on success or a negative errno.
 655 */
 656int logfs_segment_read(struct inode *inode, struct page *page,
 657                u64 ofs, u64 bix, level_t level)
 658{
 659        int err;
 660        void *buf;
 661
 662        if (PageUptodate(page))
 663                return 0;
 664
 665        ofs &= ~LOGFS_FULLY_POPULATED;
 666
 667        buf = kmap(page);
 668        err = __logfs_segment_read(inode, buf, ofs, bix, level);
 669        if (!err) {
 670                move_btree_to_page(inode, page, buf);
 671                SetPageUptodate(page);
 672        }
 673        kunmap(page);
 674        log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
 675                        inode->i_ino, bix, level, ofs, err);
 676        return err;
 677}
 678
 679int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
 680{
 681        struct super_block *sb = inode->i_sb;
 682        struct logfs_super *super = logfs_super(sb);
 683        struct logfs_object_header h;
 684        u16 len;
 685        int err;
 686
 687        super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 688        BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 689        BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
 690        if (!shadow->old_ofs)
 691                return 0;
 692
 693        log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
 694                        shadow->ino, shadow->bix, shadow->gc_level,
 695                        shadow->old_ofs, shadow->new_ofs,
 696                        shadow->old_len, shadow->new_len);
 697        err = read_obj_header(sb, shadow->old_ofs, &h);
 698        LOGFS_BUG_ON(err, sb);
 699        LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
 700        LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
 701                                shrink_level(shadow->gc_level)), sb);
 702
 703        if (shadow->gc_level == 0)
 704                len = be16_to_cpu(h.len);
 705        else
 706                len = obj_len(sb, h.type);
 707        shadow->old_len = len + sizeof(h);
 708        return 0;
 709}
 710
 711void freeseg(struct super_block *sb, u32 segno)
 712{
 713        struct logfs_super *super = logfs_super(sb);
 714        struct address_space *mapping = super->s_mapping_inode->i_mapping;
 715        struct page *page;
 716        u64 ofs, start, end;
 717
 718        start = dev_ofs(sb, segno, 0);
 719        end = dev_ofs(sb, segno + 1, 0);
 720        for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
 721                page = find_get_page(mapping, ofs >> PAGE_SHIFT);
 722                if (!page)
 723                        continue;
 724                if (PagePrivate(page)) {
 725                        ClearPagePrivate(page);
 726                        put_page(page);
 727                }
 728                put_page(page);
 729        }
 730}
 731
 732int logfs_open_area(struct logfs_area *area, size_t bytes)
 733{
 734        struct super_block *sb = area->a_sb;
 735        struct logfs_super *super = logfs_super(sb);
 736        int err, closed = 0;
 737
 738        if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
 739                return 0;
 740
 741        if (area->a_is_open) {
 742                u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 743                u32 len = super->s_segsize - area->a_written_bytes;
 744
 745                log_gc("logfs_close_area(%x)\n", area->a_segno);
 746                pad_wbuf(area, 1);
 747                super->s_devops->writeseg(area->a_sb, ofs, len);
 748                freeseg(sb, area->a_segno);
 749                closed = 1;
 750        }
 751
 752        area->a_used_bytes = 0;
 753        area->a_written_bytes = 0;
 754again:
 755        area->a_ops->get_free_segment(area);
 756        area->a_ops->get_erase_count(area);
 757
 758        log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
 759        err = area->a_ops->erase_segment(area);
 760        if (err) {
 761                printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
 762                                area->a_segno);
 763                logfs_mark_segment_bad(sb, area->a_segno);
 764                goto again;
 765        }
 766        area->a_is_open = 1;
 767        return closed;
 768}
 769
 770void logfs_sync_area(struct logfs_area *area)
 771{
 772        struct super_block *sb = area->a_sb;
 773        struct logfs_super *super = logfs_super(sb);
 774        u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 775        u32 len = (area->a_used_bytes - area->a_written_bytes);
 776
 777        if (super->s_writesize)
 778                len &= ~(super->s_writesize - 1);
 779        if (len == 0)
 780                return;
 781        pad_wbuf(area, 0);
 782        super->s_devops->writeseg(sb, ofs, len);
 783        area->a_written_bytes += len;
 784}
 785
 786void logfs_sync_segments(struct super_block *sb)
 787{
 788        struct logfs_super *super = logfs_super(sb);
 789        int i;
 790
 791        for_each_area(i)
 792                logfs_sync_area(super->s_area[i]);
 793}
 794
 795/*
 796 * Pick a free segment to be used for this area.  Effectively takes a
 797 * candidate from the free list (not really a candidate anymore).
 798 */
 799static void ostore_get_free_segment(struct logfs_area *area)
 800{
 801        struct super_block *sb = area->a_sb;
 802        struct logfs_super *super = logfs_super(sb);
 803
 804        if (super->s_free_list.count == 0) {
 805                printk(KERN_ERR"LOGFS: ran out of free segments\n");
 806                LOGFS_BUG(sb);
 807        }
 808
 809        area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
 810}
 811
 812static void ostore_get_erase_count(struct logfs_area *area)
 813{
 814        struct logfs_segment_entry se;
 815        u32 ec_level;
 816
 817        logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
 818        BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
 819                        se.valid == cpu_to_be32(RESERVED));
 820
 821        ec_level = be32_to_cpu(se.ec_level);
 822        area->a_erase_count = (ec_level >> 4) + 1;
 823}
 824
 825static int ostore_erase_segment(struct logfs_area *area)
 826{
 827        struct super_block *sb = area->a_sb;
 828        struct logfs_segment_header sh;
 829        u64 ofs;
 830        int err;
 831
 832        err = logfs_erase_segment(sb, area->a_segno, 0);
 833        if (err)
 834                return err;
 835
 836        sh.pad = 0;
 837        sh.type = SEG_OSTORE;
 838        sh.level = (__force u8)area->a_level;
 839        sh.segno = cpu_to_be32(area->a_segno);
 840        sh.ec = cpu_to_be32(area->a_erase_count);
 841        sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
 842        sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
 843
 844        logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
 845                        area->a_level);
 846
 847        ofs = dev_ofs(sb, area->a_segno, 0);
 848        area->a_used_bytes = sizeof(sh);
 849        logfs_buf_write(area, ofs, &sh, sizeof(sh));
 850        return 0;
 851}
 852
 853static const struct logfs_area_ops ostore_area_ops = {
 854        .get_free_segment       = ostore_get_free_segment,
 855        .get_erase_count        = ostore_get_erase_count,
 856        .erase_segment          = ostore_erase_segment,
 857};
 858
 859static void free_area(struct logfs_area *area)
 860{
 861        if (area)
 862                freeseg(area->a_sb, area->a_segno);
 863        kfree(area);
 864}
 865
 866void free_areas(struct super_block *sb)
 867{
 868        struct logfs_super *super = logfs_super(sb);
 869        int i;
 870
 871        for_each_area(i)
 872                free_area(super->s_area[i]);
 873        free_area(super->s_journal_area);
 874}
 875
 876static struct logfs_area *alloc_area(struct super_block *sb)
 877{
 878        struct logfs_area *area;
 879
 880        area = kzalloc(sizeof(*area), GFP_KERNEL);
 881        if (!area)
 882                return NULL;
 883
 884        area->a_sb = sb;
 885        return area;
 886}
 887
 888static void map_invalidatepage(struct page *page, unsigned int o,
 889                               unsigned int l)
 890{
 891        return;
 892}
 893
 894static int map_releasepage(struct page *page, gfp_t g)
 895{
 896        /* Don't release these pages */
 897        return 0;
 898}
 899
 900static const struct address_space_operations mapping_aops = {
 901        .invalidatepage = map_invalidatepage,
 902        .releasepage    = map_releasepage,
 903        .set_page_dirty = __set_page_dirty_nobuffers,
 904};
 905
 906int logfs_init_mapping(struct super_block *sb)
 907{
 908        struct logfs_super *super = logfs_super(sb);
 909        struct address_space *mapping;
 910        struct inode *inode;
 911
 912        inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
 913        if (IS_ERR(inode))
 914                return PTR_ERR(inode);
 915        super->s_mapping_inode = inode;
 916        mapping = inode->i_mapping;
 917        mapping->a_ops = &mapping_aops;
 918        /* Would it be possible to use __GFP_HIGHMEM as well? */
 919        mapping_set_gfp_mask(mapping, GFP_NOFS);
 920        return 0;
 921}
 922
 923int logfs_init_areas(struct super_block *sb)
 924{
 925        struct logfs_super *super = logfs_super(sb);
 926        int i = -1;
 927
 928        super->s_alias_pool = mempool_create_kmalloc_pool(600,
 929                        sizeof(struct object_alias_item));
 930        if (!super->s_alias_pool)
 931                return -ENOMEM;
 932
 933        super->s_journal_area = alloc_area(sb);
 934        if (!super->s_journal_area)
 935                goto err;
 936
 937        for_each_area(i) {
 938                super->s_area[i] = alloc_area(sb);
 939                if (!super->s_area[i])
 940                        goto err;
 941                super->s_area[i]->a_level = GC_LEVEL(i);
 942                super->s_area[i]->a_ops = &ostore_area_ops;
 943        }
 944        btree_init_mempool128(&super->s_object_alias_tree,
 945                        super->s_btree_pool);
 946        return 0;
 947
 948err:
 949        for (i--; i >= 0; i--)
 950                free_area(super->s_area[i]);
 951        free_area(super->s_journal_area);
 952        logfs_mempool_destroy(super->s_alias_pool);
 953        return -ENOMEM;
 954}
 955
 956void logfs_cleanup_areas(struct super_block *sb)
 957{
 958        struct logfs_super *super = logfs_super(sb);
 959
 960        btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
 961}
 962