linux/fs/nilfs2/btnode.c
<<
>>
Prefs
   1/*
   2 * btnode.c - NILFS B-tree node cache
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * This file was originally written by Seiji Kihara <kihara@osrg.net>
  21 * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
  22 * stabilization and simplification.
  23 *
  24 */
  25
  26#include <linux/types.h>
  27#include <linux/buffer_head.h>
  28#include <linux/mm.h>
  29#include <linux/backing-dev.h>
  30#include "nilfs.h"
  31#include "mdt.h"
  32#include "dat.h"
  33#include "page.h"
  34#include "btnode.h"
  35
  36
  37void nilfs_btnode_cache_init_once(struct address_space *btnc)
  38{
  39        memset(btnc, 0, sizeof(*btnc));
  40        INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC);
  41        spin_lock_init(&btnc->tree_lock);
  42        INIT_LIST_HEAD(&btnc->private_list);
  43        spin_lock_init(&btnc->private_lock);
  44
  45        spin_lock_init(&btnc->i_mmap_lock);
  46        INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
  47        INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
  48}
  49
  50static const struct address_space_operations def_btnode_aops = {
  51        .sync_page              = block_sync_page,
  52};
  53
  54void nilfs_btnode_cache_init(struct address_space *btnc,
  55                             struct backing_dev_info *bdi)
  56{
  57        btnc->host = NULL;  /* can safely set to host inode ? */
  58        btnc->flags = 0;
  59        mapping_set_gfp_mask(btnc, GFP_NOFS);
  60        btnc->assoc_mapping = NULL;
  61        btnc->backing_dev_info = bdi;
  62        btnc->a_ops = &def_btnode_aops;
  63}
  64
  65void nilfs_btnode_cache_clear(struct address_space *btnc)
  66{
  67        invalidate_mapping_pages(btnc, 0, -1);
  68        truncate_inode_pages(btnc, 0);
  69}
  70
  71int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
  72                              sector_t pblocknr, struct buffer_head **pbh,
  73                              int newblk)
  74{
  75        struct buffer_head *bh;
  76        struct inode *inode = NILFS_BTNC_I(btnc);
  77        int err;
  78
  79        bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
  80        if (unlikely(!bh))
  81                return -ENOMEM;
  82
  83        err = -EEXIST; /* internal code */
  84        if (newblk) {
  85                if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
  86                             buffer_dirty(bh))) {
  87                        brelse(bh);
  88                        BUG();
  89                }
  90                memset(bh->b_data, 0, 1 << inode->i_blkbits);
  91                bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
  92                bh->b_blocknr = blocknr;
  93                set_buffer_mapped(bh);
  94                set_buffer_uptodate(bh);
  95                goto found;
  96        }
  97
  98        if (buffer_uptodate(bh) || buffer_dirty(bh))
  99                goto found;
 100
 101        if (pblocknr == 0) {
 102                pblocknr = blocknr;
 103                if (inode->i_ino != NILFS_DAT_INO) {
 104                        struct inode *dat =
 105                                nilfs_dat_inode(NILFS_I_NILFS(inode));
 106
 107                        /* blocknr is a virtual block number */
 108                        err = nilfs_dat_translate(dat, blocknr, &pblocknr);
 109                        if (unlikely(err)) {
 110                                brelse(bh);
 111                                goto out_locked;
 112                        }
 113                }
 114        }
 115        lock_buffer(bh);
 116        if (buffer_uptodate(bh)) {
 117                unlock_buffer(bh);
 118                err = -EEXIST; /* internal code */
 119                goto found;
 120        }
 121        set_buffer_mapped(bh);
 122        bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
 123        bh->b_blocknr = pblocknr; /* set block address for read */
 124        bh->b_end_io = end_buffer_read_sync;
 125        get_bh(bh);
 126        submit_bh(READ, bh);
 127        bh->b_blocknr = blocknr; /* set back to the given block address */
 128        err = 0;
 129found:
 130        *pbh = bh;
 131
 132out_locked:
 133        unlock_page(bh->b_page);
 134        page_cache_release(bh->b_page);
 135        return err;
 136}
 137
 138int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
 139                     sector_t pblocknr, struct buffer_head **pbh, int newblk)
 140{
 141        struct buffer_head *bh;
 142        int err;
 143
 144        err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
 145        if (err == -EEXIST) /* internal code (cache hit) */
 146                return 0;
 147        if (unlikely(err))
 148                return err;
 149
 150        bh = *pbh;
 151        wait_on_buffer(bh);
 152        if (!buffer_uptodate(bh)) {
 153                brelse(bh);
 154                return -EIO;
 155        }
 156        return 0;
 157}
 158
 159/**
 160 * nilfs_btnode_delete - delete B-tree node buffer
 161 * @bh: buffer to be deleted
 162 *
 163 * nilfs_btnode_delete() invalidates the specified buffer and delete the page
 164 * including the buffer if the page gets unbusy.
 165 */
 166void nilfs_btnode_delete(struct buffer_head *bh)
 167{
 168        struct address_space *mapping;
 169        struct page *page = bh->b_page;
 170        pgoff_t index = page_index(page);
 171        int still_dirty;
 172
 173        page_cache_get(page);
 174        lock_page(page);
 175        wait_on_page_writeback(page);
 176
 177        nilfs_forget_buffer(bh);
 178        still_dirty = PageDirty(page);
 179        mapping = page->mapping;
 180        unlock_page(page);
 181        page_cache_release(page);
 182
 183        if (!still_dirty && mapping)
 184                invalidate_inode_pages2_range(mapping, index, index);
 185}
 186
 187/**
 188 * nilfs_btnode_prepare_change_key
 189 *  prepare to move contents of the block for old key to one of new key.
 190 *  the old buffer will not be removed, but might be reused for new buffer.
 191 *  it might return -ENOMEM because of memory allocation errors,
 192 *  and might return -EIO because of disk read errors.
 193 */
 194int nilfs_btnode_prepare_change_key(struct address_space *btnc,
 195                                    struct nilfs_btnode_chkey_ctxt *ctxt)
 196{
 197        struct buffer_head *obh, *nbh;
 198        struct inode *inode = NILFS_BTNC_I(btnc);
 199        __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
 200        int err;
 201
 202        if (oldkey == newkey)
 203                return 0;
 204
 205        obh = ctxt->bh;
 206        ctxt->newbh = NULL;
 207
 208        if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
 209                lock_page(obh->b_page);
 210                /*
 211                 * We cannot call radix_tree_preload for the kernels older
 212                 * than 2.6.23, because it is not exported for modules.
 213                 */
 214retry:
 215                err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
 216                if (err)
 217                        goto failed_unlock;
 218                /* BUG_ON(oldkey != obh->b_page->index); */
 219                if (unlikely(oldkey != obh->b_page->index))
 220                        NILFS_PAGE_BUG(obh->b_page,
 221                                       "invalid oldkey %lld (newkey=%lld)",
 222                                       (unsigned long long)oldkey,
 223                                       (unsigned long long)newkey);
 224
 225                spin_lock_irq(&btnc->tree_lock);
 226                err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
 227                spin_unlock_irq(&btnc->tree_lock);
 228                /*
 229                 * Note: page->index will not change to newkey until
 230                 * nilfs_btnode_commit_change_key() will be called.
 231                 * To protect the page in intermediate state, the page lock
 232                 * is held.
 233                 */
 234                radix_tree_preload_end();
 235                if (!err)
 236                        return 0;
 237                else if (err != -EEXIST)
 238                        goto failed_unlock;
 239
 240                err = invalidate_inode_pages2_range(btnc, newkey, newkey);
 241                if (!err)
 242                        goto retry;
 243                /* fallback to copy mode */
 244                unlock_page(obh->b_page);
 245        }
 246
 247        err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1);
 248        if (likely(!err)) {
 249                BUG_ON(nbh == obh);
 250                ctxt->newbh = nbh;
 251        }
 252        return err;
 253
 254 failed_unlock:
 255        unlock_page(obh->b_page);
 256        return err;
 257}
 258
 259/**
 260 * nilfs_btnode_commit_change_key
 261 *  commit the change_key operation prepared by prepare_change_key().
 262 */
 263void nilfs_btnode_commit_change_key(struct address_space *btnc,
 264                                    struct nilfs_btnode_chkey_ctxt *ctxt)
 265{
 266        struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
 267        __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
 268        struct page *opage;
 269
 270        if (oldkey == newkey)
 271                return;
 272
 273        if (nbh == NULL) {      /* blocksize == pagesize */
 274                opage = obh->b_page;
 275                if (unlikely(oldkey != opage->index))
 276                        NILFS_PAGE_BUG(opage,
 277                                       "invalid oldkey %lld (newkey=%lld)",
 278                                       (unsigned long long)oldkey,
 279                                       (unsigned long long)newkey);
 280                nilfs_btnode_mark_dirty(obh);
 281
 282                spin_lock_irq(&btnc->tree_lock);
 283                radix_tree_delete(&btnc->page_tree, oldkey);
 284                radix_tree_tag_set(&btnc->page_tree, newkey,
 285                                   PAGECACHE_TAG_DIRTY);
 286                spin_unlock_irq(&btnc->tree_lock);
 287
 288                opage->index = obh->b_blocknr = newkey;
 289                unlock_page(opage);
 290        } else {
 291                nilfs_copy_buffer(nbh, obh);
 292                nilfs_btnode_mark_dirty(nbh);
 293
 294                nbh->b_blocknr = newkey;
 295                ctxt->bh = nbh;
 296                nilfs_btnode_delete(obh); /* will decrement bh->b_count */
 297        }
 298}
 299
 300/**
 301 * nilfs_btnode_abort_change_key
 302 *  abort the change_key operation prepared by prepare_change_key().
 303 */
 304void nilfs_btnode_abort_change_key(struct address_space *btnc,
 305                                   struct nilfs_btnode_chkey_ctxt *ctxt)
 306{
 307        struct buffer_head *nbh = ctxt->newbh;
 308        __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
 309
 310        if (oldkey == newkey)
 311                return;
 312
 313        if (nbh == NULL) {      /* blocksize == pagesize */
 314                spin_lock_irq(&btnc->tree_lock);
 315                radix_tree_delete(&btnc->page_tree, newkey);
 316                spin_unlock_irq(&btnc->tree_lock);
 317                unlock_page(ctxt->bh->b_page);
 318        } else
 319                brelse(nbh);
 320}
 321