linux/fs/nilfs2/page.c
<<
>>
Prefs
   1/*
   2 * page.c - buffer/page management specific to NILFS
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * Written by Ryusuke Konishi <ryusuke@osrg.net>,
  21 *            Seiji Kihara <kihara@osrg.net>.
  22 */
  23
  24#include <linux/pagemap.h>
  25#include <linux/writeback.h>
  26#include <linux/swap.h>
  27#include <linux/bitops.h>
  28#include <linux/page-flags.h>
  29#include <linux/list.h>
  30#include <linux/highmem.h>
  31#include <linux/pagevec.h>
  32#include "nilfs.h"
  33#include "page.h"
  34#include "mdt.h"
  35
  36
  37#define NILFS_BUFFER_INHERENT_BITS  \
  38        ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
  39         (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated))
  40
  41static struct buffer_head *
  42__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
  43                       int blkbits, unsigned long b_state)
  44
  45{
  46        unsigned long first_block;
  47        struct buffer_head *bh;
  48
  49        if (!page_has_buffers(page))
  50                create_empty_buffers(page, 1 << blkbits, b_state);
  51
  52        first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
  53        bh = nilfs_page_get_nth_block(page, block - first_block);
  54
  55        touch_buffer(bh);
  56        wait_on_buffer(bh);
  57        return bh;
  58}
  59
  60/*
  61 * Since the page cache of B-tree node pages or data page cache of pseudo
  62 * inodes does not have a valid mapping->host pointer, calling
  63 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
  64 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
  65 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
  66 */
  67void nilfs_mark_buffer_dirty(struct buffer_head *bh)
  68{
  69        if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
  70                __set_page_dirty_nobuffers(bh->b_page);
  71}
  72
  73struct buffer_head *nilfs_grab_buffer(struct inode *inode,
  74                                      struct address_space *mapping,
  75                                      unsigned long blkoff,
  76                                      unsigned long b_state)
  77{
  78        int blkbits = inode->i_blkbits;
  79        pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
  80        struct page *page, *opage;
  81        struct buffer_head *bh, *obh;
  82
  83        page = grab_cache_page(mapping, index);
  84        if (unlikely(!page))
  85                return NULL;
  86
  87        bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
  88        if (unlikely(!bh)) {
  89                unlock_page(page);
  90                page_cache_release(page);
  91                return NULL;
  92        }
  93        if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) {
  94                /*
  95                 * Shadow page cache uses assoc_mapping to point its original
  96                 * page cache.  The following code tries the original cache
  97                 * if the given cache is a shadow and it didn't hit.
  98                 */
  99                opage = find_lock_page(mapping->assoc_mapping, index);
 100                if (!opage)
 101                        return bh;
 102
 103                obh = __nilfs_get_page_block(opage, blkoff, index, blkbits,
 104                                             b_state);
 105                if (buffer_uptodate(obh)) {
 106                        nilfs_copy_buffer(bh, obh);
 107                        if (buffer_dirty(obh)) {
 108                                nilfs_mark_buffer_dirty(bh);
 109                                if (!buffer_nilfs_node(bh) && NILFS_MDT(inode))
 110                                        nilfs_mdt_mark_dirty(inode);
 111                        }
 112                }
 113                brelse(obh);
 114                unlock_page(opage);
 115                page_cache_release(opage);
 116        }
 117        return bh;
 118}
 119
 120/**
 121 * nilfs_forget_buffer - discard dirty state
 122 * @inode: owner inode of the buffer
 123 * @bh: buffer head of the buffer to be discarded
 124 */
 125void nilfs_forget_buffer(struct buffer_head *bh)
 126{
 127        struct page *page = bh->b_page;
 128
 129        lock_buffer(bh);
 130        clear_buffer_nilfs_volatile(bh);
 131        clear_buffer_dirty(bh);
 132        if (nilfs_page_buffers_clean(page))
 133                __nilfs_clear_page_dirty(page);
 134
 135        clear_buffer_uptodate(bh);
 136        clear_buffer_mapped(bh);
 137        bh->b_blocknr = -1;
 138        ClearPageUptodate(page);
 139        ClearPageMappedToDisk(page);
 140        unlock_buffer(bh);
 141        brelse(bh);
 142}
 143
 144/**
 145 * nilfs_copy_buffer -- copy buffer data and flags
 146 * @dbh: destination buffer
 147 * @sbh: source buffer
 148 */
 149void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
 150{
 151        void *kaddr0, *kaddr1;
 152        unsigned long bits;
 153        struct page *spage = sbh->b_page, *dpage = dbh->b_page;
 154        struct buffer_head *bh;
 155
 156        kaddr0 = kmap_atomic(spage, KM_USER0);
 157        kaddr1 = kmap_atomic(dpage, KM_USER1);
 158        memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
 159        kunmap_atomic(kaddr1, KM_USER1);
 160        kunmap_atomic(kaddr0, KM_USER0);
 161
 162        dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
 163        dbh->b_blocknr = sbh->b_blocknr;
 164        dbh->b_bdev = sbh->b_bdev;
 165
 166        bh = dbh;
 167        bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
 168        while ((bh = bh->b_this_page) != dbh) {
 169                lock_buffer(bh);
 170                bits &= bh->b_state;
 171                unlock_buffer(bh);
 172        }
 173        if (bits & (1UL << BH_Uptodate))
 174                SetPageUptodate(dpage);
 175        else
 176                ClearPageUptodate(dpage);
 177        if (bits & (1UL << BH_Mapped))
 178                SetPageMappedToDisk(dpage);
 179        else
 180                ClearPageMappedToDisk(dpage);
 181}
 182
 183/**
 184 * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
 185 * @page: page to be checked
 186 *
 187 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
 188 * Otherwise, it returns non-zero value.
 189 */
 190int nilfs_page_buffers_clean(struct page *page)
 191{
 192        struct buffer_head *bh, *head;
 193
 194        bh = head = page_buffers(page);
 195        do {
 196                if (buffer_dirty(bh))
 197                        return 0;
 198                bh = bh->b_this_page;
 199        } while (bh != head);
 200        return 1;
 201}
 202
 203void nilfs_page_bug(struct page *page)
 204{
 205        struct address_space *m;
 206        unsigned long ino = 0;
 207
 208        if (unlikely(!page)) {
 209                printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
 210                return;
 211        }
 212
 213        m = page->mapping;
 214        if (m) {
 215                struct inode *inode = NILFS_AS_I(m);
 216                if (inode != NULL)
 217                        ino = inode->i_ino;
 218        }
 219        printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
 220               "mapping=%p ino=%lu\n",
 221               page, atomic_read(&page->_count),
 222               (unsigned long long)page->index, page->flags, m, ino);
 223
 224        if (page_has_buffers(page)) {
 225                struct buffer_head *bh, *head;
 226                int i = 0;
 227
 228                bh = head = page_buffers(page);
 229                do {
 230                        printk(KERN_CRIT
 231                               " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
 232                               i++, bh, atomic_read(&bh->b_count),
 233                               (unsigned long long)bh->b_blocknr, bh->b_state);
 234                        bh = bh->b_this_page;
 235                } while (bh != head);
 236        }
 237}
 238
 239/**
 240 * nilfs_alloc_private_page - allocate a private page with buffer heads
 241 *
 242 * Return Value: On success, a pointer to the allocated page is returned.
 243 * On error, NULL is returned.
 244 */
 245struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
 246                                      unsigned long state)
 247{
 248        struct buffer_head *bh, *head, *tail;
 249        struct page *page;
 250
 251        page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
 252        if (unlikely(!page))
 253                return NULL;
 254
 255        lock_page(page);
 256        head = alloc_page_buffers(page, size, 0);
 257        if (unlikely(!head)) {
 258                unlock_page(page);
 259                __free_page(page);
 260                return NULL;
 261        }
 262
 263        bh = head;
 264        do {
 265                bh->b_state = (1UL << BH_NILFS_Allocated) | state;
 266                tail = bh;
 267                bh->b_bdev = bdev;
 268                bh = bh->b_this_page;
 269        } while (bh);
 270
 271        tail->b_this_page = head;
 272        attach_page_buffers(page, head);
 273
 274        return page;
 275}
 276
 277void nilfs_free_private_page(struct page *page)
 278{
 279        BUG_ON(!PageLocked(page));
 280        BUG_ON(page->mapping);
 281
 282        if (page_has_buffers(page) && !try_to_free_buffers(page))
 283                NILFS_PAGE_BUG(page, "failed to free page");
 284
 285        unlock_page(page);
 286        __free_page(page);
 287}
 288
 289/**
 290 * nilfs_copy_page -- copy the page with buffers
 291 * @dst: destination page
 292 * @src: source page
 293 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
 294 *
 295 * This fuction is for both data pages and btnode pages.  The dirty flag
 296 * should be treated by caller.  The page must not be under i/o.
 297 * Both src and dst page must be locked
 298 */
 299static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
 300{
 301        struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
 302        unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
 303
 304        BUG_ON(PageWriteback(dst));
 305
 306        sbh = sbufs = page_buffers(src);
 307        if (!page_has_buffers(dst))
 308                create_empty_buffers(dst, sbh->b_size, 0);
 309
 310        if (copy_dirty)
 311                mask |= (1UL << BH_Dirty);
 312
 313        dbh = dbufs = page_buffers(dst);
 314        do {
 315                lock_buffer(sbh);
 316                lock_buffer(dbh);
 317                dbh->b_state = sbh->b_state & mask;
 318                dbh->b_blocknr = sbh->b_blocknr;
 319                dbh->b_bdev = sbh->b_bdev;
 320                sbh = sbh->b_this_page;
 321                dbh = dbh->b_this_page;
 322        } while (dbh != dbufs);
 323
 324        copy_highpage(dst, src);
 325
 326        if (PageUptodate(src) && !PageUptodate(dst))
 327                SetPageUptodate(dst);
 328        else if (!PageUptodate(src) && PageUptodate(dst))
 329                ClearPageUptodate(dst);
 330        if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
 331                SetPageMappedToDisk(dst);
 332        else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
 333                ClearPageMappedToDisk(dst);
 334
 335        do {
 336                unlock_buffer(sbh);
 337                unlock_buffer(dbh);
 338                sbh = sbh->b_this_page;
 339                dbh = dbh->b_this_page;
 340        } while (dbh != dbufs);
 341}
 342
 343int nilfs_copy_dirty_pages(struct address_space *dmap,
 344                           struct address_space *smap)
 345{
 346        struct pagevec pvec;
 347        unsigned int i;
 348        pgoff_t index = 0;
 349        int err = 0;
 350
 351        pagevec_init(&pvec, 0);
 352repeat:
 353        if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
 354                                PAGEVEC_SIZE))
 355                return 0;
 356
 357        for (i = 0; i < pagevec_count(&pvec); i++) {
 358                struct page *page = pvec.pages[i], *dpage;
 359
 360                lock_page(page);
 361                if (unlikely(!PageDirty(page)))
 362                        NILFS_PAGE_BUG(page, "inconsistent dirty state");
 363
 364                dpage = grab_cache_page(dmap, page->index);
 365                if (unlikely(!dpage)) {
 366                        /* No empty page is added to the page cache */
 367                        err = -ENOMEM;
 368                        unlock_page(page);
 369                        break;
 370                }
 371                if (unlikely(!page_has_buffers(page)))
 372                        NILFS_PAGE_BUG(page,
 373                                       "found empty page in dat page cache");
 374
 375                nilfs_copy_page(dpage, page, 1);
 376                __set_page_dirty_nobuffers(dpage);
 377
 378                unlock_page(dpage);
 379                page_cache_release(dpage);
 380                unlock_page(page);
 381        }
 382        pagevec_release(&pvec);
 383        cond_resched();
 384
 385        if (likely(!err))
 386                goto repeat;
 387        return err;
 388}
 389
 390/**
 391 * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache
 392 * @dmap: destination page cache
 393 * @smap: source page cache
 394 *
 395 * No pages must no be added to the cache during this process.
 396 * This must be ensured by the caller.
 397 */
 398void nilfs_copy_back_pages(struct address_space *dmap,
 399                           struct address_space *smap)
 400{
 401        struct pagevec pvec;
 402        unsigned int i, n;
 403        pgoff_t index = 0;
 404        int err;
 405
 406        pagevec_init(&pvec, 0);
 407repeat:
 408        n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
 409        if (!n)
 410                return;
 411        index = pvec.pages[n - 1]->index + 1;
 412
 413        for (i = 0; i < pagevec_count(&pvec); i++) {
 414                struct page *page = pvec.pages[i], *dpage;
 415                pgoff_t offset = page->index;
 416
 417                lock_page(page);
 418                dpage = find_lock_page(dmap, offset);
 419                if (dpage) {
 420                        /* override existing page on the destination cache */
 421                        WARN_ON(PageDirty(dpage));
 422                        nilfs_copy_page(dpage, page, 0);
 423                        unlock_page(dpage);
 424                        page_cache_release(dpage);
 425                } else {
 426                        struct page *page2;
 427
 428                        /* move the page to the destination cache */
 429                        spin_lock_irq(&smap->tree_lock);
 430                        page2 = radix_tree_delete(&smap->page_tree, offset);
 431                        WARN_ON(page2 != page);
 432
 433                        smap->nrpages--;
 434                        spin_unlock_irq(&smap->tree_lock);
 435
 436                        spin_lock_irq(&dmap->tree_lock);
 437                        err = radix_tree_insert(&dmap->page_tree, offset, page);
 438                        if (unlikely(err < 0)) {
 439                                WARN_ON(err == -EEXIST);
 440                                page->mapping = NULL;
 441                                page_cache_release(page); /* for cache */
 442                        } else {
 443                                page->mapping = dmap;
 444                                dmap->nrpages++;
 445                                if (PageDirty(page))
 446                                        radix_tree_tag_set(&dmap->page_tree,
 447                                                           offset,
 448                                                           PAGECACHE_TAG_DIRTY);
 449                        }
 450                        spin_unlock_irq(&dmap->tree_lock);
 451                }
 452                unlock_page(page);
 453        }
 454        pagevec_release(&pvec);
 455        cond_resched();
 456
 457        goto repeat;
 458}
 459
 460void nilfs_clear_dirty_pages(struct address_space *mapping)
 461{
 462        struct pagevec pvec;
 463        unsigned int i;
 464        pgoff_t index = 0;
 465
 466        pagevec_init(&pvec, 0);
 467
 468        while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
 469                                  PAGEVEC_SIZE)) {
 470                for (i = 0; i < pagevec_count(&pvec); i++) {
 471                        struct page *page = pvec.pages[i];
 472                        struct buffer_head *bh, *head;
 473
 474                        lock_page(page);
 475                        ClearPageUptodate(page);
 476                        ClearPageMappedToDisk(page);
 477                        bh = head = page_buffers(page);
 478                        do {
 479                                lock_buffer(bh);
 480                                clear_buffer_dirty(bh);
 481                                clear_buffer_nilfs_volatile(bh);
 482                                clear_buffer_uptodate(bh);
 483                                clear_buffer_mapped(bh);
 484                                unlock_buffer(bh);
 485                                bh = bh->b_this_page;
 486                        } while (bh != head);
 487
 488                        __nilfs_clear_page_dirty(page);
 489                        unlock_page(page);
 490                }
 491                pagevec_release(&pvec);
 492                cond_resched();
 493        }
 494}
 495
 496unsigned nilfs_page_count_clean_buffers(struct page *page,
 497                                        unsigned from, unsigned to)
 498{
 499        unsigned block_start, block_end;
 500        struct buffer_head *bh, *head;
 501        unsigned nc = 0;
 502
 503        for (bh = head = page_buffers(page), block_start = 0;
 504             bh != head || !block_start;
 505             block_start = block_end, bh = bh->b_this_page) {
 506                block_end = block_start + bh->b_size;
 507                if (block_end > from && block_start < to && !buffer_dirty(bh))
 508                        nc++;
 509        }
 510        return nc;
 511}
 512
 513/*
 514 * NILFS2 needs clear_page_dirty() in the following two cases:
 515 *
 516 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
 517 *    page dirty flags when it copies back pages from the shadow cache
 518 *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
 519 *    (dat->{i_mapping,i_btnode_cache}).
 520 *
 521 * 2) Some B-tree operations like insertion or deletion may dispose buffers
 522 *    in dirty state, and this needs to cancel the dirty state of their pages.
 523 */
 524int __nilfs_clear_page_dirty(struct page *page)
 525{
 526        struct address_space *mapping = page->mapping;
 527
 528        if (mapping) {
 529                spin_lock_irq(&mapping->tree_lock);
 530                if (test_bit(PG_dirty, &page->flags)) {
 531                        radix_tree_tag_clear(&mapping->page_tree,
 532                                             page_index(page),
 533                                             PAGECACHE_TAG_DIRTY);
 534                        spin_unlock_irq(&mapping->tree_lock);
 535                        return clear_page_dirty_for_io(page);
 536                }
 537                spin_unlock_irq(&mapping->tree_lock);
 538                return 0;
 539        }
 540        return TestClearPageDirty(page);
 541}
 542