linux/fs/logfs/segment.c
<<
>>
Prefs
   1/*
   2 * fs/logfs/segment.c   - Handling the Object Store
   3 *
   4 * As should be obvious for Linux kernel code, license is GPLv2
   5 *
   6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7 *
   8 * Object store or ostore makes up the complete device with exception of
   9 * the superblock and journal areas.  Apart from its own metadata it stores
  10 * three kinds of objects: inodes, dentries and blocks, both data and indirect.
  11 */
  12#include "logfs.h"
  13#include <linux/slab.h>
  14
  15static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  16{
  17        struct logfs_super *super = logfs_super(sb);
  18        struct btree_head32 *head = &super->s_reserved_segments;
  19        int err;
  20
  21        err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
  22        if (err)
  23                return err;
  24        logfs_super(sb)->s_bad_segments++;
  25        /* FIXME: write to journal */
  26        return 0;
  27}
  28
  29int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
  30{
  31        struct logfs_super *super = logfs_super(sb);
  32
  33        super->s_gec++;
  34
  35        return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
  36                        super->s_segsize, ensure_erase);
  37}
  38
  39static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
  40{
  41        s32 ofs;
  42
  43        logfs_open_area(area, bytes);
  44
  45        ofs = area->a_used_bytes;
  46        area->a_used_bytes += bytes;
  47        BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
  48
  49        return dev_ofs(area->a_sb, area->a_segno, ofs);
  50}
  51
  52static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
  53                int use_filler)
  54{
  55        struct logfs_super *super = logfs_super(sb);
  56        struct address_space *mapping = super->s_mapping_inode->i_mapping;
  57        filler_t *filler = super->s_devops->readpage;
  58        struct page *page;
  59
  60        BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
  61        if (use_filler)
  62                page = read_cache_page(mapping, index, filler, sb);
  63        else {
  64                page = find_or_create_page(mapping, index, GFP_NOFS);
  65                unlock_page(page);
  66        }
  67        return page;
  68}
  69
  70int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
  71                int use_filler)
  72{
  73        pgoff_t index = ofs >> PAGE_SHIFT;
  74        struct page *page;
  75        long offset = ofs & (PAGE_SIZE-1);
  76        long copylen;
  77
  78        /* Only logfs_wbuf_recover may use len==0 */
  79        BUG_ON(!len && !use_filler);
  80        do {
  81                copylen = min((ulong)len, PAGE_SIZE - offset);
  82
  83                page = get_mapping_page(area->a_sb, index, use_filler);
  84                if (IS_ERR(page))
  85                        return PTR_ERR(page);
  86                BUG_ON(!page); /* FIXME: reserve a pool */
  87                SetPageUptodate(page);
  88                memcpy(page_address(page) + offset, buf, copylen);
  89
  90                if (!PagePrivate(page)) {
  91                        SetPagePrivate(page);
  92                        page_cache_get(page);
  93                }
  94                page_cache_release(page);
  95
  96                buf += copylen;
  97                len -= copylen;
  98                offset = 0;
  99                index++;
 100        } while (len);
 101        return 0;
 102}
 103
 104static void pad_partial_page(struct logfs_area *area)
 105{
 106        struct super_block *sb = area->a_sb;
 107        struct page *page;
 108        u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 109        pgoff_t index = ofs >> PAGE_SHIFT;
 110        long offset = ofs & (PAGE_SIZE-1);
 111        u32 len = PAGE_SIZE - offset;
 112
 113        if (len % PAGE_SIZE) {
 114                page = get_mapping_page(sb, index, 0);
 115                BUG_ON(!page); /* FIXME: reserve a pool */
 116                memset(page_address(page) + offset, 0xff, len);
 117                if (!PagePrivate(page)) {
 118                        SetPagePrivate(page);
 119                        page_cache_get(page);
 120                }
 121                page_cache_release(page);
 122        }
 123}
 124
 125static void pad_full_pages(struct logfs_area *area)
 126{
 127        struct super_block *sb = area->a_sb;
 128        struct logfs_super *super = logfs_super(sb);
 129        u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 130        u32 len = super->s_segsize - area->a_used_bytes;
 131        pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
 132        pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
 133        struct page *page;
 134
 135        while (no_indizes) {
 136                page = get_mapping_page(sb, index, 0);
 137                BUG_ON(!page); /* FIXME: reserve a pool */
 138                SetPageUptodate(page);
 139                memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
 140                if (!PagePrivate(page)) {
 141                        SetPagePrivate(page);
 142                        page_cache_get(page);
 143                }
 144                page_cache_release(page);
 145                index++;
 146                no_indizes--;
 147        }
 148}
 149
 150/*
 151 * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
 152 * Also make sure we allocate (and memset) all pages for final writeout.
 153 */
 154static void pad_wbuf(struct logfs_area *area, int final)
 155{
 156        pad_partial_page(area);
 157        if (final)
 158                pad_full_pages(area);
 159}
 160
 161/*
 162 * We have to be careful with the alias tree.  Since lookup is done by bix,
 163 * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
 164 * indirect blocks.  So always use it through accessor functions.
 165 */
 166static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
 167                level_t level)
 168{
 169        struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 170        pgoff_t index = logfs_pack_index(bix, level);
 171
 172        return btree_lookup128(head, ino, index);
 173}
 174
 175static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
 176                level_t level, void *val)
 177{
 178        struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 179        pgoff_t index = logfs_pack_index(bix, level);
 180
 181        return btree_insert128(head, ino, index, val, GFP_NOFS);
 182}
 183
 184static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
 185                write_alias_t *write_one_alias)
 186{
 187        struct object_alias_item *item;
 188        int err;
 189
 190        list_for_each_entry(item, &block->item_list, list) {
 191                err = write_alias_journal(sb, block->ino, block->bix,
 192                                block->level, item->child_no, item->val);
 193                if (err)
 194                        return err;
 195        }
 196        return 0;
 197}
 198
 199static struct logfs_block_ops btree_block_ops = {
 200        .write_block    = btree_write_block,
 201        .free_block     = __free_block,
 202        .write_alias    = btree_write_alias,
 203};
 204
 205int logfs_load_object_aliases(struct super_block *sb,
 206                struct logfs_obj_alias *oa, int count)
 207{
 208        struct logfs_super *super = logfs_super(sb);
 209        struct logfs_block *block;
 210        struct object_alias_item *item;
 211        u64 ino, bix;
 212        level_t level;
 213        int i, err;
 214
 215        super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 216        count /= sizeof(*oa);
 217        for (i = 0; i < count; i++) {
 218                item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 219                if (!item)
 220                        return -ENOMEM;
 221                memset(item, 0, sizeof(*item));
 222
 223                super->s_no_object_aliases++;
 224                item->val = oa[i].val;
 225                item->child_no = be16_to_cpu(oa[i].child_no);
 226
 227                ino = be64_to_cpu(oa[i].ino);
 228                bix = be64_to_cpu(oa[i].bix);
 229                level = LEVEL(oa[i].level);
 230
 231                log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
 232                                ino, bix, level, item->child_no,
 233                                be64_to_cpu(item->val));
 234                block = alias_tree_lookup(sb, ino, bix, level);
 235                if (!block) {
 236                        block = __alloc_block(sb, ino, bix, level);
 237                        block->ops = &btree_block_ops;
 238                        err = alias_tree_insert(sb, ino, bix, level, block);
 239                        BUG_ON(err); /* mempool empty */
 240                }
 241                if (test_and_set_bit(item->child_no, block->alias_map)) {
 242                        printk(KERN_ERR"LogFS: Alias collision detected\n");
 243                        return -EIO;
 244                }
 245                list_move_tail(&block->alias_list, &super->s_object_alias);
 246                list_add(&item->list, &block->item_list);
 247        }
 248        return 0;
 249}
 250
 251static void kill_alias(void *_block, unsigned long ignore0,
 252                u64 ignore1, u64 ignore2, size_t ignore3)
 253{
 254        struct logfs_block *block = _block;
 255        struct super_block *sb = block->sb;
 256        struct logfs_super *super = logfs_super(sb);
 257        struct object_alias_item *item;
 258
 259        while (!list_empty(&block->item_list)) {
 260                item = list_entry(block->item_list.next, typeof(*item), list);
 261                list_del(&item->list);
 262                mempool_free(item, super->s_alias_pool);
 263        }
 264        block->ops->free_block(sb, block);
 265}
 266
 267static int obj_type(struct inode *inode, level_t level)
 268{
 269        if (level == 0) {
 270                if (S_ISDIR(inode->i_mode))
 271                        return OBJ_DENTRY;
 272                if (inode->i_ino == LOGFS_INO_MASTER)
 273                        return OBJ_INODE;
 274        }
 275        return OBJ_BLOCK;
 276}
 277
 278static int obj_len(struct super_block *sb, int obj_type)
 279{
 280        switch (obj_type) {
 281        case OBJ_DENTRY:
 282                return sizeof(struct logfs_disk_dentry);
 283        case OBJ_INODE:
 284                return sizeof(struct logfs_disk_inode);
 285        case OBJ_BLOCK:
 286                return sb->s_blocksize;
 287        default:
 288                BUG();
 289        }
 290}
 291
 292static int __logfs_segment_write(struct inode *inode, void *buf,
 293                struct logfs_shadow *shadow, int type, int len, int compr)
 294{
 295        struct logfs_area *area;
 296        struct super_block *sb = inode->i_sb;
 297        s64 ofs;
 298        struct logfs_object_header h;
 299        int acc_len;
 300
 301        if (shadow->gc_level == 0)
 302                acc_len = len;
 303        else
 304                acc_len = obj_len(sb, type);
 305
 306        area = get_area(sb, shadow->gc_level);
 307        ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
 308        LOGFS_BUG_ON(ofs <= 0, sb);
 309        /*
 310         * Order is important.  logfs_get_free_bytes(), by modifying the
 311         * segment file, may modify the content of the very page we're about
 312         * to write now.  Which is fine, as long as the calculated crc and
 313         * written data still match.  So do the modifications _before_
 314         * calculating the crc.
 315         */
 316
 317        h.len   = cpu_to_be16(len);
 318        h.type  = type;
 319        h.compr = compr;
 320        h.ino   = cpu_to_be64(inode->i_ino);
 321        h.bix   = cpu_to_be64(shadow->bix);
 322        h.crc   = logfs_crc32(&h, sizeof(h) - 4, 4);
 323        h.data_crc = logfs_crc32(buf, len, 0);
 324
 325        logfs_buf_write(area, ofs, &h, sizeof(h));
 326        logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
 327
 328        shadow->new_ofs = ofs;
 329        shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
 330
 331        return 0;
 332}
 333
 334static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
 335                struct logfs_shadow *shadow, int type, int len)
 336{
 337        struct super_block *sb = inode->i_sb;
 338        void *compressor_buf = logfs_super(sb)->s_compressed_je;
 339        ssize_t compr_len;
 340        int ret;
 341
 342        mutex_lock(&logfs_super(sb)->s_journal_mutex);
 343        compr_len = logfs_compress(buf, compressor_buf, len, len);
 344
 345        if (compr_len >= 0) {
 346                ret = __logfs_segment_write(inode, compressor_buf, shadow,
 347                                type, compr_len, COMPR_ZLIB);
 348        } else {
 349                ret = __logfs_segment_write(inode, buf, shadow, type, len,
 350                                COMPR_NONE);
 351        }
 352        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 353        return ret;
 354}
 355
 356/**
 357 * logfs_segment_write - write data block to object store
 358 * @inode:              inode containing data
 359 *
 360 * Returns an errno or zero.
 361 */
 362int logfs_segment_write(struct inode *inode, struct page *page,
 363                struct logfs_shadow *shadow)
 364{
 365        struct super_block *sb = inode->i_sb;
 366        struct logfs_super *super = logfs_super(sb);
 367        int do_compress, type, len;
 368        int ret;
 369        void *buf;
 370
 371        super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 372        BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 373        do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
 374        if (shadow->gc_level != 0) {
 375                /* temporarily disable compression for indirect blocks */
 376                do_compress = 0;
 377        }
 378
 379        type = obj_type(inode, shrink_level(shadow->gc_level));
 380        len = obj_len(sb, type);
 381        buf = kmap(page);
 382        if (do_compress)
 383                ret = logfs_segment_write_compress(inode, buf, shadow, type,
 384                                len);
 385        else
 386                ret = __logfs_segment_write(inode, buf, shadow, type, len,
 387                                COMPR_NONE);
 388        kunmap(page);
 389
 390        log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
 391                        shadow->ino, shadow->bix, shadow->gc_level,
 392                        shadow->old_ofs, shadow->new_ofs,
 393                        shadow->old_len, shadow->new_len);
 394        /* this BUG_ON did catch a locking bug.  useful */
 395        BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
 396        return ret;
 397}
 398
 399int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
 400{
 401        pgoff_t index = ofs >> PAGE_SHIFT;
 402        struct page *page;
 403        long offset = ofs & (PAGE_SIZE-1);
 404        long copylen;
 405
 406        while (len) {
 407                copylen = min((ulong)len, PAGE_SIZE - offset);
 408
 409                page = get_mapping_page(sb, index, 1);
 410                if (IS_ERR(page))
 411                        return PTR_ERR(page);
 412                memcpy(buf, page_address(page) + offset, copylen);
 413                page_cache_release(page);
 414
 415                buf += copylen;
 416                len -= copylen;
 417                offset = 0;
 418                index++;
 419        }
 420        return 0;
 421}
 422
 423/*
 424 * The "position" of indirect blocks is ambiguous.  It can be the position
 425 * of any data block somewhere behind this indirect block.  So we need to
 426 * normalize the positions through logfs_block_mask() before comparing.
 427 */
 428static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
 429{
 430        return  (pos1 & logfs_block_mask(sb, level)) !=
 431                (pos2 & logfs_block_mask(sb, level));
 432}
 433
 434#if 0
 435static int read_seg_header(struct super_block *sb, u64 ofs,
 436                struct logfs_segment_header *sh)
 437{
 438        __be32 crc;
 439        int err;
 440
 441        err = wbuf_read(sb, ofs, sizeof(*sh), sh);
 442        if (err)
 443                return err;
 444        crc = logfs_crc32(sh, sizeof(*sh), 4);
 445        if (crc != sh->crc) {
 446                printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 447                                "got %x\n", ofs, be32_to_cpu(sh->crc),
 448                                be32_to_cpu(crc));
 449                return -EIO;
 450        }
 451        return 0;
 452}
 453#endif
 454
 455static int read_obj_header(struct super_block *sb, u64 ofs,
 456                struct logfs_object_header *oh)
 457{
 458        __be32 crc;
 459        int err;
 460
 461        err = wbuf_read(sb, ofs, sizeof(*oh), oh);
 462        if (err)
 463                return err;
 464        crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
 465        if (crc != oh->crc) {
 466                printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 467                                "got %x\n", ofs, be32_to_cpu(oh->crc),
 468                                be32_to_cpu(crc));
 469                return -EIO;
 470        }
 471        return 0;
 472}
 473
 474static void move_btree_to_page(struct inode *inode, struct page *page,
 475                __be64 *data)
 476{
 477        struct super_block *sb = inode->i_sb;
 478        struct logfs_super *super = logfs_super(sb);
 479        struct btree_head128 *head = &super->s_object_alias_tree;
 480        struct logfs_block *block;
 481        struct object_alias_item *item, *next;
 482
 483        if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
 484                return;
 485
 486        block = btree_remove128(head, inode->i_ino, page->index);
 487        if (!block)
 488                return;
 489
 490        log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
 491                        block->ino, block->bix, block->level);
 492        list_for_each_entry_safe(item, next, &block->item_list, list) {
 493                data[item->child_no] = item->val;
 494                list_del(&item->list);
 495                mempool_free(item, super->s_alias_pool);
 496        }
 497        block->page = page;
 498
 499        if (!PagePrivate(page)) {
 500                SetPagePrivate(page);
 501                page_cache_get(page);
 502                set_page_private(page, (unsigned long) block);
 503        }
 504        block->ops = &indirect_block_ops;
 505        initialize_block_counters(page, block, data, 0);
 506}
 507
 508/*
 509 * This silences a false, yet annoying gcc warning.  I hate it when my editor
 510 * jumps into bitops.h each time I recompile this file.
 511 * TODO: Complain to gcc folks about this and upgrade compiler.
 512 */
 513static unsigned long fnb(const unsigned long *addr,
 514                unsigned long size, unsigned long offset)
 515{
 516        return find_next_bit(addr, size, offset);
 517}
 518
 519void move_page_to_btree(struct page *page)
 520{
 521        struct logfs_block *block = logfs_block(page);
 522        struct super_block *sb = block->sb;
 523        struct logfs_super *super = logfs_super(sb);
 524        struct object_alias_item *item;
 525        unsigned long pos;
 526        __be64 *child;
 527        int err;
 528
 529        if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
 530                block->ops->free_block(sb, block);
 531                return;
 532        }
 533        log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
 534                        block->ino, block->bix, block->level);
 535        super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 536
 537        for (pos = 0; ; pos++) {
 538                pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
 539                if (pos >= LOGFS_BLOCK_FACTOR)
 540                        break;
 541
 542                item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 543                BUG_ON(!item); /* mempool empty */
 544                memset(item, 0, sizeof(*item));
 545
 546                child = kmap_atomic(page);
 547                item->val = child[pos];
 548                kunmap_atomic(child);
 549                item->child_no = pos;
 550                list_add(&item->list, &block->item_list);
 551        }
 552        block->page = NULL;
 553
 554        if (PagePrivate(page)) {
 555                ClearPagePrivate(page);
 556                page_cache_release(page);
 557                set_page_private(page, 0);
 558        }
 559        block->ops = &btree_block_ops;
 560        err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
 561                        block);
 562        BUG_ON(err); /* mempool empty */
 563        ClearPageUptodate(page);
 564}
 565
 566static int __logfs_segment_read(struct inode *inode, void *buf,
 567                u64 ofs, u64 bix, level_t level)
 568{
 569        struct super_block *sb = inode->i_sb;
 570        void *compressor_buf = logfs_super(sb)->s_compressed_je;
 571        struct logfs_object_header oh;
 572        __be32 crc;
 573        u16 len;
 574        int err, block_len;
 575
 576        block_len = obj_len(sb, obj_type(inode, level));
 577        err = read_obj_header(sb, ofs, &oh);
 578        if (err)
 579                goto out_err;
 580
 581        err = -EIO;
 582        if (be64_to_cpu(oh.ino) != inode->i_ino
 583                        || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
 584                printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
 585                                "expected (%lx, %llx), got (%llx, %llx)\n",
 586                                ofs, inode->i_ino, bix,
 587                                be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
 588                goto out_err;
 589        }
 590
 591        len = be16_to_cpu(oh.len);
 592
 593        switch (oh.compr) {
 594        case COMPR_NONE:
 595                err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
 596                if (err)
 597                        goto out_err;
 598                crc = logfs_crc32(buf, len, 0);
 599                if (crc != oh.data_crc) {
 600                        printk(KERN_ERR"LOGFS: uncompressed data crc error at "
 601                                        "%llx: expected %x, got %x\n", ofs,
 602                                        be32_to_cpu(oh.data_crc),
 603                                        be32_to_cpu(crc));
 604                        goto out_err;
 605                }
 606                break;
 607        case COMPR_ZLIB:
 608                mutex_lock(&logfs_super(sb)->s_journal_mutex);
 609                err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
 610                                compressor_buf);
 611                if (err) {
 612                        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 613                        goto out_err;
 614                }
 615                crc = logfs_crc32(compressor_buf, len, 0);
 616                if (crc != oh.data_crc) {
 617                        printk(KERN_ERR"LOGFS: compressed data crc error at "
 618                                        "%llx: expected %x, got %x\n", ofs,
 619                                        be32_to_cpu(oh.data_crc),
 620                                        be32_to_cpu(crc));
 621                        mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 622                        goto out_err;
 623                }
 624                err = logfs_uncompress(compressor_buf, buf, len, block_len);
 625                mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 626                if (err) {
 627                        printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
 628                        goto out_err;
 629                }
 630                break;
 631        default:
 632                LOGFS_BUG(sb);
 633                err = -EIO;
 634                goto out_err;
 635        }
 636        return 0;
 637
 638out_err:
 639        logfs_set_ro(sb);
 640        printk(KERN_ERR"LOGFS: device is read-only now\n");
 641        LOGFS_BUG(sb);
 642        return err;
 643}
 644
 645/**
 646 * logfs_segment_read - read data block from object store
 647 * @inode:              inode containing data
 648 * @buf:                data buffer
 649 * @ofs:                physical data offset
 650 * @bix:                block index
 651 * @level:              block level
 652 *
 653 * Returns 0 on success or a negative errno.
 654 */
 655int logfs_segment_read(struct inode *inode, struct page *page,
 656                u64 ofs, u64 bix, level_t level)
 657{
 658        int err;
 659        void *buf;
 660
 661        if (PageUptodate(page))
 662                return 0;
 663
 664        ofs &= ~LOGFS_FULLY_POPULATED;
 665
 666        buf = kmap(page);
 667        err = __logfs_segment_read(inode, buf, ofs, bix, level);
 668        if (!err) {
 669                move_btree_to_page(inode, page, buf);
 670                SetPageUptodate(page);
 671        }
 672        kunmap(page);
 673        log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
 674                        inode->i_ino, bix, level, ofs, err);
 675        return err;
 676}
 677
 678int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
 679{
 680        struct super_block *sb = inode->i_sb;
 681        struct logfs_super *super = logfs_super(sb);
 682        struct logfs_object_header h;
 683        u16 len;
 684        int err;
 685
 686        super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 687        BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 688        BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
 689        if (!shadow->old_ofs)
 690                return 0;
 691
 692        log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
 693                        shadow->ino, shadow->bix, shadow->gc_level,
 694                        shadow->old_ofs, shadow->new_ofs,
 695                        shadow->old_len, shadow->new_len);
 696        err = read_obj_header(sb, shadow->old_ofs, &h);
 697        LOGFS_BUG_ON(err, sb);
 698        LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
 699        LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
 700                                shrink_level(shadow->gc_level)), sb);
 701
 702        if (shadow->gc_level == 0)
 703                len = be16_to_cpu(h.len);
 704        else
 705                len = obj_len(sb, h.type);
 706        shadow->old_len = len + sizeof(h);
 707        return 0;
 708}
 709
 710void freeseg(struct super_block *sb, u32 segno)
 711{
 712        struct logfs_super *super = logfs_super(sb);
 713        struct address_space *mapping = super->s_mapping_inode->i_mapping;
 714        struct page *page;
 715        u64 ofs, start, end;
 716
 717        start = dev_ofs(sb, segno, 0);
 718        end = dev_ofs(sb, segno + 1, 0);
 719        for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
 720                page = find_get_page(mapping, ofs >> PAGE_SHIFT);
 721                if (!page)
 722                        continue;
 723                if (PagePrivate(page)) {
 724                        ClearPagePrivate(page);
 725                        page_cache_release(page);
 726                }
 727                page_cache_release(page);
 728        }
 729}
 730
 731int logfs_open_area(struct logfs_area *area, size_t bytes)
 732{
 733        struct super_block *sb = area->a_sb;
 734        struct logfs_super *super = logfs_super(sb);
 735        int err, closed = 0;
 736
 737        if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
 738                return 0;
 739
 740        if (area->a_is_open) {
 741                u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 742                u32 len = super->s_segsize - area->a_written_bytes;
 743
 744                log_gc("logfs_close_area(%x)\n", area->a_segno);
 745                pad_wbuf(area, 1);
 746                super->s_devops->writeseg(area->a_sb, ofs, len);
 747                freeseg(sb, area->a_segno);
 748                closed = 1;
 749        }
 750
 751        area->a_used_bytes = 0;
 752        area->a_written_bytes = 0;
 753again:
 754        area->a_ops->get_free_segment(area);
 755        area->a_ops->get_erase_count(area);
 756
 757        log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
 758        err = area->a_ops->erase_segment(area);
 759        if (err) {
 760                printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
 761                                area->a_segno);
 762                logfs_mark_segment_bad(sb, area->a_segno);
 763                goto again;
 764        }
 765        area->a_is_open = 1;
 766        return closed;
 767}
 768
 769void logfs_sync_area(struct logfs_area *area)
 770{
 771        struct super_block *sb = area->a_sb;
 772        struct logfs_super *super = logfs_super(sb);
 773        u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 774        u32 len = (area->a_used_bytes - area->a_written_bytes);
 775
 776        if (super->s_writesize)
 777                len &= ~(super->s_writesize - 1);
 778        if (len == 0)
 779                return;
 780        pad_wbuf(area, 0);
 781        super->s_devops->writeseg(sb, ofs, len);
 782        area->a_written_bytes += len;
 783}
 784
 785void logfs_sync_segments(struct super_block *sb)
 786{
 787        struct logfs_super *super = logfs_super(sb);
 788        int i;
 789
 790        for_each_area(i)
 791                logfs_sync_area(super->s_area[i]);
 792}
 793
 794/*
 795 * Pick a free segment to be used for this area.  Effectively takes a
 796 * candidate from the free list (not really a candidate anymore).
 797 */
 798static void ostore_get_free_segment(struct logfs_area *area)
 799{
 800        struct super_block *sb = area->a_sb;
 801        struct logfs_super *super = logfs_super(sb);
 802
 803        if (super->s_free_list.count == 0) {
 804                printk(KERN_ERR"LOGFS: ran out of free segments\n");
 805                LOGFS_BUG(sb);
 806        }
 807
 808        area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
 809}
 810
 811static void ostore_get_erase_count(struct logfs_area *area)
 812{
 813        struct logfs_segment_entry se;
 814        u32 ec_level;
 815
 816        logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
 817        BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
 818                        se.valid == cpu_to_be32(RESERVED));
 819
 820        ec_level = be32_to_cpu(se.ec_level);
 821        area->a_erase_count = (ec_level >> 4) + 1;
 822}
 823
 824static int ostore_erase_segment(struct logfs_area *area)
 825{
 826        struct super_block *sb = area->a_sb;
 827        struct logfs_segment_header sh;
 828        u64 ofs;
 829        int err;
 830
 831        err = logfs_erase_segment(sb, area->a_segno, 0);
 832        if (err)
 833                return err;
 834
 835        sh.pad = 0;
 836        sh.type = SEG_OSTORE;
 837        sh.level = (__force u8)area->a_level;
 838        sh.segno = cpu_to_be32(area->a_segno);
 839        sh.ec = cpu_to_be32(area->a_erase_count);
 840        sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
 841        sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
 842
 843        logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
 844                        area->a_level);
 845
 846        ofs = dev_ofs(sb, area->a_segno, 0);
 847        area->a_used_bytes = sizeof(sh);
 848        logfs_buf_write(area, ofs, &sh, sizeof(sh));
 849        return 0;
 850}
 851
 852static const struct logfs_area_ops ostore_area_ops = {
 853        .get_free_segment       = ostore_get_free_segment,
 854        .get_erase_count        = ostore_get_erase_count,
 855        .erase_segment          = ostore_erase_segment,
 856};
 857
 858static void free_area(struct logfs_area *area)
 859{
 860        if (area)
 861                freeseg(area->a_sb, area->a_segno);
 862        kfree(area);
 863}
 864
 865void free_areas(struct super_block *sb)
 866{
 867        struct logfs_super *super = logfs_super(sb);
 868        int i;
 869
 870        for_each_area(i)
 871                free_area(super->s_area[i]);
 872        free_area(super->s_journal_area);
 873}
 874
 875static struct logfs_area *alloc_area(struct super_block *sb)
 876{
 877        struct logfs_area *area;
 878
 879        area = kzalloc(sizeof(*area), GFP_KERNEL);
 880        if (!area)
 881                return NULL;
 882
 883        area->a_sb = sb;
 884        return area;
 885}
 886
 887static void map_invalidatepage(struct page *page, unsigned int o,
 888                               unsigned int l)
 889{
 890        return;
 891}
 892
 893static int map_releasepage(struct page *page, gfp_t g)
 894{
 895        /* Don't release these pages */
 896        return 0;
 897}
 898
 899static const struct address_space_operations mapping_aops = {
 900        .invalidatepage = map_invalidatepage,
 901        .releasepage    = map_releasepage,
 902        .set_page_dirty = __set_page_dirty_nobuffers,
 903};
 904
 905int logfs_init_mapping(struct super_block *sb)
 906{
 907        struct logfs_super *super = logfs_super(sb);
 908        struct address_space *mapping;
 909        struct inode *inode;
 910
 911        inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
 912        if (IS_ERR(inode))
 913                return PTR_ERR(inode);
 914        super->s_mapping_inode = inode;
 915        mapping = inode->i_mapping;
 916        mapping->a_ops = &mapping_aops;
 917        /* Would it be possible to use __GFP_HIGHMEM as well? */
 918        mapping_set_gfp_mask(mapping, GFP_NOFS);
 919        return 0;
 920}
 921
 922int logfs_init_areas(struct super_block *sb)
 923{
 924        struct logfs_super *super = logfs_super(sb);
 925        int i = -1;
 926
 927        super->s_alias_pool = mempool_create_kmalloc_pool(600,
 928                        sizeof(struct object_alias_item));
 929        if (!super->s_alias_pool)
 930                return -ENOMEM;
 931
 932        super->s_journal_area = alloc_area(sb);
 933        if (!super->s_journal_area)
 934                goto err;
 935
 936        for_each_area(i) {
 937                super->s_area[i] = alloc_area(sb);
 938                if (!super->s_area[i])
 939                        goto err;
 940                super->s_area[i]->a_level = GC_LEVEL(i);
 941                super->s_area[i]->a_ops = &ostore_area_ops;
 942        }
 943        btree_init_mempool128(&super->s_object_alias_tree,
 944                        super->s_btree_pool);
 945        return 0;
 946
 947err:
 948        for (i--; i >= 0; i--)
 949                free_area(super->s_area[i]);
 950        free_area(super->s_journal_area);
 951        logfs_mempool_destroy(super->s_alias_pool);
 952        return -ENOMEM;
 953}
 954
 955void logfs_cleanup_areas(struct super_block *sb)
 956{
 957        struct logfs_super *super = logfs_super(sb);
 958
 959        btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
 960}
 961