linux/fs/nilfs2/page.c
<<
>>
Prefs
   1/*
   2 * page.c - buffer/page management specific to NILFS
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * Written by Ryusuke Konishi and Seiji Kihara.
  17 */
  18
  19#include <linux/pagemap.h>
  20#include <linux/writeback.h>
  21#include <linux/swap.h>
  22#include <linux/bitops.h>
  23#include <linux/page-flags.h>
  24#include <linux/list.h>
  25#include <linux/highmem.h>
  26#include <linux/pagevec.h>
  27#include <linux/gfp.h>
  28#include "nilfs.h"
  29#include "page.h"
  30#include "mdt.h"
  31
  32
  33#define NILFS_BUFFER_INHERENT_BITS                                      \
  34        (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) |       \
  35         BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked))
  36
  37static struct buffer_head *
  38__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
  39                       int blkbits, unsigned long b_state)
  40
  41{
  42        unsigned long first_block;
  43        struct buffer_head *bh;
  44
  45        if (!page_has_buffers(page))
  46                create_empty_buffers(page, 1 << blkbits, b_state);
  47
  48        first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
  49        bh = nilfs_page_get_nth_block(page, block - first_block);
  50
  51        touch_buffer(bh);
  52        wait_on_buffer(bh);
  53        return bh;
  54}
  55
  56struct buffer_head *nilfs_grab_buffer(struct inode *inode,
  57                                      struct address_space *mapping,
  58                                      unsigned long blkoff,
  59                                      unsigned long b_state)
  60{
  61        int blkbits = inode->i_blkbits;
  62        pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
  63        struct page *page;
  64        struct buffer_head *bh;
  65
  66        page = grab_cache_page(mapping, index);
  67        if (unlikely(!page))
  68                return NULL;
  69
  70        bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
  71        if (unlikely(!bh)) {
  72                unlock_page(page);
  73                put_page(page);
  74                return NULL;
  75        }
  76        return bh;
  77}
  78
  79/**
  80 * nilfs_forget_buffer - discard dirty state
  81 * @inode: owner inode of the buffer
  82 * @bh: buffer head of the buffer to be discarded
  83 */
  84void nilfs_forget_buffer(struct buffer_head *bh)
  85{
  86        struct page *page = bh->b_page;
  87        const unsigned long clear_bits =
  88                (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
  89                 BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
  90                 BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
  91
  92        lock_buffer(bh);
  93        set_mask_bits(&bh->b_state, clear_bits, 0);
  94        if (nilfs_page_buffers_clean(page))
  95                __nilfs_clear_page_dirty(page);
  96
  97        bh->b_blocknr = -1;
  98        ClearPageUptodate(page);
  99        ClearPageMappedToDisk(page);
 100        unlock_buffer(bh);
 101        brelse(bh);
 102}
 103
 104/**
 105 * nilfs_copy_buffer -- copy buffer data and flags
 106 * @dbh: destination buffer
 107 * @sbh: source buffer
 108 */
 109void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
 110{
 111        void *kaddr0, *kaddr1;
 112        unsigned long bits;
 113        struct page *spage = sbh->b_page, *dpage = dbh->b_page;
 114        struct buffer_head *bh;
 115
 116        kaddr0 = kmap_atomic(spage);
 117        kaddr1 = kmap_atomic(dpage);
 118        memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
 119        kunmap_atomic(kaddr1);
 120        kunmap_atomic(kaddr0);
 121
 122        dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
 123        dbh->b_blocknr = sbh->b_blocknr;
 124        dbh->b_bdev = sbh->b_bdev;
 125
 126        bh = dbh;
 127        bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped));
 128        while ((bh = bh->b_this_page) != dbh) {
 129                lock_buffer(bh);
 130                bits &= bh->b_state;
 131                unlock_buffer(bh);
 132        }
 133        if (bits & BIT(BH_Uptodate))
 134                SetPageUptodate(dpage);
 135        else
 136                ClearPageUptodate(dpage);
 137        if (bits & BIT(BH_Mapped))
 138                SetPageMappedToDisk(dpage);
 139        else
 140                ClearPageMappedToDisk(dpage);
 141}
 142
 143/**
 144 * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
 145 * @page: page to be checked
 146 *
 147 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
 148 * Otherwise, it returns non-zero value.
 149 */
 150int nilfs_page_buffers_clean(struct page *page)
 151{
 152        struct buffer_head *bh, *head;
 153
 154        bh = head = page_buffers(page);
 155        do {
 156                if (buffer_dirty(bh))
 157                        return 0;
 158                bh = bh->b_this_page;
 159        } while (bh != head);
 160        return 1;
 161}
 162
 163void nilfs_page_bug(struct page *page)
 164{
 165        struct address_space *m;
 166        unsigned long ino;
 167
 168        if (unlikely(!page)) {
 169                printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
 170                return;
 171        }
 172
 173        m = page->mapping;
 174        ino = m ? m->host->i_ino : 0;
 175
 176        printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
 177               "mapping=%p ino=%lu\n",
 178               page, page_ref_count(page),
 179               (unsigned long long)page->index, page->flags, m, ino);
 180
 181        if (page_has_buffers(page)) {
 182                struct buffer_head *bh, *head;
 183                int i = 0;
 184
 185                bh = head = page_buffers(page);
 186                do {
 187                        printk(KERN_CRIT
 188                               " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
 189                               i++, bh, atomic_read(&bh->b_count),
 190                               (unsigned long long)bh->b_blocknr, bh->b_state);
 191                        bh = bh->b_this_page;
 192                } while (bh != head);
 193        }
 194}
 195
 196/**
 197 * nilfs_copy_page -- copy the page with buffers
 198 * @dst: destination page
 199 * @src: source page
 200 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
 201 *
 202 * This function is for both data pages and btnode pages.  The dirty flag
 203 * should be treated by caller.  The page must not be under i/o.
 204 * Both src and dst page must be locked
 205 */
 206static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
 207{
 208        struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
 209        unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
 210
 211        BUG_ON(PageWriteback(dst));
 212
 213        sbh = sbufs = page_buffers(src);
 214        if (!page_has_buffers(dst))
 215                create_empty_buffers(dst, sbh->b_size, 0);
 216
 217        if (copy_dirty)
 218                mask |= BIT(BH_Dirty);
 219
 220        dbh = dbufs = page_buffers(dst);
 221        do {
 222                lock_buffer(sbh);
 223                lock_buffer(dbh);
 224                dbh->b_state = sbh->b_state & mask;
 225                dbh->b_blocknr = sbh->b_blocknr;
 226                dbh->b_bdev = sbh->b_bdev;
 227                sbh = sbh->b_this_page;
 228                dbh = dbh->b_this_page;
 229        } while (dbh != dbufs);
 230
 231        copy_highpage(dst, src);
 232
 233        if (PageUptodate(src) && !PageUptodate(dst))
 234                SetPageUptodate(dst);
 235        else if (!PageUptodate(src) && PageUptodate(dst))
 236                ClearPageUptodate(dst);
 237        if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
 238                SetPageMappedToDisk(dst);
 239        else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
 240                ClearPageMappedToDisk(dst);
 241
 242        do {
 243                unlock_buffer(sbh);
 244                unlock_buffer(dbh);
 245                sbh = sbh->b_this_page;
 246                dbh = dbh->b_this_page;
 247        } while (dbh != dbufs);
 248}
 249
 250int nilfs_copy_dirty_pages(struct address_space *dmap,
 251                           struct address_space *smap)
 252{
 253        struct pagevec pvec;
 254        unsigned int i;
 255        pgoff_t index = 0;
 256        int err = 0;
 257
 258        pagevec_init(&pvec, 0);
 259repeat:
 260        if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
 261                                PAGEVEC_SIZE))
 262                return 0;
 263
 264        for (i = 0; i < pagevec_count(&pvec); i++) {
 265                struct page *page = pvec.pages[i], *dpage;
 266
 267                lock_page(page);
 268                if (unlikely(!PageDirty(page)))
 269                        NILFS_PAGE_BUG(page, "inconsistent dirty state");
 270
 271                dpage = grab_cache_page(dmap, page->index);
 272                if (unlikely(!dpage)) {
 273                        /* No empty page is added to the page cache */
 274                        err = -ENOMEM;
 275                        unlock_page(page);
 276                        break;
 277                }
 278                if (unlikely(!page_has_buffers(page)))
 279                        NILFS_PAGE_BUG(page,
 280                                       "found empty page in dat page cache");
 281
 282                nilfs_copy_page(dpage, page, 1);
 283                __set_page_dirty_nobuffers(dpage);
 284
 285                unlock_page(dpage);
 286                put_page(dpage);
 287                unlock_page(page);
 288        }
 289        pagevec_release(&pvec);
 290        cond_resched();
 291
 292        if (likely(!err))
 293                goto repeat;
 294        return err;
 295}
 296
 297/**
 298 * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
 299 * @dmap: destination page cache
 300 * @smap: source page cache
 301 *
 302 * No pages must no be added to the cache during this process.
 303 * This must be ensured by the caller.
 304 */
 305void nilfs_copy_back_pages(struct address_space *dmap,
 306                           struct address_space *smap)
 307{
 308        struct pagevec pvec;
 309        unsigned int i, n;
 310        pgoff_t index = 0;
 311        int err;
 312
 313        pagevec_init(&pvec, 0);
 314repeat:
 315        n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
 316        if (!n)
 317                return;
 318        index = pvec.pages[n - 1]->index + 1;
 319
 320        for (i = 0; i < pagevec_count(&pvec); i++) {
 321                struct page *page = pvec.pages[i], *dpage;
 322                pgoff_t offset = page->index;
 323
 324                lock_page(page);
 325                dpage = find_lock_page(dmap, offset);
 326                if (dpage) {
 327                        /* override existing page on the destination cache */
 328                        WARN_ON(PageDirty(dpage));
 329                        nilfs_copy_page(dpage, page, 0);
 330                        unlock_page(dpage);
 331                        put_page(dpage);
 332                } else {
 333                        struct page *page2;
 334
 335                        /* move the page to the destination cache */
 336                        spin_lock_irq(&smap->tree_lock);
 337                        page2 = radix_tree_delete(&smap->page_tree, offset);
 338                        WARN_ON(page2 != page);
 339
 340                        smap->nrpages--;
 341                        spin_unlock_irq(&smap->tree_lock);
 342
 343                        spin_lock_irq(&dmap->tree_lock);
 344                        err = radix_tree_insert(&dmap->page_tree, offset, page);
 345                        if (unlikely(err < 0)) {
 346                                WARN_ON(err == -EEXIST);
 347                                page->mapping = NULL;
 348                                put_page(page); /* for cache */
 349                        } else {
 350                                page->mapping = dmap;
 351                                dmap->nrpages++;
 352                                if (PageDirty(page))
 353                                        radix_tree_tag_set(&dmap->page_tree,
 354                                                           offset,
 355                                                           PAGECACHE_TAG_DIRTY);
 356                        }
 357                        spin_unlock_irq(&dmap->tree_lock);
 358                }
 359                unlock_page(page);
 360        }
 361        pagevec_release(&pvec);
 362        cond_resched();
 363
 364        goto repeat;
 365}
 366
 367/**
 368 * nilfs_clear_dirty_pages - discard dirty pages in address space
 369 * @mapping: address space with dirty pages for discarding
 370 * @silent: suppress [true] or print [false] warning messages
 371 */
 372void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
 373{
 374        struct pagevec pvec;
 375        unsigned int i;
 376        pgoff_t index = 0;
 377
 378        pagevec_init(&pvec, 0);
 379
 380        while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
 381                                  PAGEVEC_SIZE)) {
 382                for (i = 0; i < pagevec_count(&pvec); i++) {
 383                        struct page *page = pvec.pages[i];
 384
 385                        lock_page(page);
 386                        nilfs_clear_dirty_page(page, silent);
 387                        unlock_page(page);
 388                }
 389                pagevec_release(&pvec);
 390                cond_resched();
 391        }
 392}
 393
 394/**
 395 * nilfs_clear_dirty_page - discard dirty page
 396 * @page: dirty page that will be discarded
 397 * @silent: suppress [true] or print [false] warning messages
 398 */
 399void nilfs_clear_dirty_page(struct page *page, bool silent)
 400{
 401        struct inode *inode = page->mapping->host;
 402        struct super_block *sb = inode->i_sb;
 403
 404        BUG_ON(!PageLocked(page));
 405
 406        if (!silent)
 407                nilfs_msg(sb, KERN_WARNING,
 408                          "discard dirty page: offset=%lld, ino=%lu",
 409                          page_offset(page), inode->i_ino);
 410
 411        ClearPageUptodate(page);
 412        ClearPageMappedToDisk(page);
 413
 414        if (page_has_buffers(page)) {
 415                struct buffer_head *bh, *head;
 416                const unsigned long clear_bits =
 417                        (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
 418                         BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
 419                         BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
 420
 421                bh = head = page_buffers(page);
 422                do {
 423                        lock_buffer(bh);
 424                        if (!silent)
 425                                nilfs_msg(sb, KERN_WARNING,
 426                                          "discard dirty block: blocknr=%llu, size=%zu",
 427                                          (u64)bh->b_blocknr, bh->b_size);
 428
 429                        set_mask_bits(&bh->b_state, clear_bits, 0);
 430                        unlock_buffer(bh);
 431                } while (bh = bh->b_this_page, bh != head);
 432        }
 433
 434        __nilfs_clear_page_dirty(page);
 435}
 436
 437unsigned int nilfs_page_count_clean_buffers(struct page *page,
 438                                            unsigned int from, unsigned int to)
 439{
 440        unsigned int block_start, block_end;
 441        struct buffer_head *bh, *head;
 442        unsigned int nc = 0;
 443
 444        for (bh = head = page_buffers(page), block_start = 0;
 445             bh != head || !block_start;
 446             block_start = block_end, bh = bh->b_this_page) {
 447                block_end = block_start + bh->b_size;
 448                if (block_end > from && block_start < to && !buffer_dirty(bh))
 449                        nc++;
 450        }
 451        return nc;
 452}
 453
 454void nilfs_mapping_init(struct address_space *mapping, struct inode *inode)
 455{
 456        mapping->host = inode;
 457        mapping->flags = 0;
 458        mapping_set_gfp_mask(mapping, GFP_NOFS);
 459        mapping->private_data = NULL;
 460        mapping->a_ops = &empty_aops;
 461}
 462
 463/*
 464 * NILFS2 needs clear_page_dirty() in the following two cases:
 465 *
 466 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
 467 *    page dirty flags when it copies back pages from the shadow cache
 468 *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
 469 *    (dat->{i_mapping,i_btnode_cache}).
 470 *
 471 * 2) Some B-tree operations like insertion or deletion may dispose buffers
 472 *    in dirty state, and this needs to cancel the dirty state of their pages.
 473 */
 474int __nilfs_clear_page_dirty(struct page *page)
 475{
 476        struct address_space *mapping = page->mapping;
 477
 478        if (mapping) {
 479                spin_lock_irq(&mapping->tree_lock);
 480                if (test_bit(PG_dirty, &page->flags)) {
 481                        radix_tree_tag_clear(&mapping->page_tree,
 482                                             page_index(page),
 483                                             PAGECACHE_TAG_DIRTY);
 484                        spin_unlock_irq(&mapping->tree_lock);
 485                        return clear_page_dirty_for_io(page);
 486                }
 487                spin_unlock_irq(&mapping->tree_lock);
 488                return 0;
 489        }
 490        return TestClearPageDirty(page);
 491}
 492
 493/**
 494 * nilfs_find_uncommitted_extent - find extent of uncommitted data
 495 * @inode: inode
 496 * @start_blk: start block offset (in)
 497 * @blkoff: start offset of the found extent (out)
 498 *
 499 * This function searches an extent of buffers marked "delayed" which
 500 * starts from a block offset equal to or larger than @start_blk.  If
 501 * such an extent was found, this will store the start offset in
 502 * @blkoff and return its length in blocks.  Otherwise, zero is
 503 * returned.
 504 */
 505unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
 506                                            sector_t start_blk,
 507                                            sector_t *blkoff)
 508{
 509        unsigned int i;
 510        pgoff_t index;
 511        unsigned int nblocks_in_page;
 512        unsigned long length = 0;
 513        sector_t b;
 514        struct pagevec pvec;
 515        struct page *page;
 516
 517        if (inode->i_mapping->nrpages == 0)
 518                return 0;
 519
 520        index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
 521        nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
 522
 523        pagevec_init(&pvec, 0);
 524
 525repeat:
 526        pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
 527                                        pvec.pages);
 528        if (pvec.nr == 0)
 529                return length;
 530
 531        if (length > 0 && pvec.pages[0]->index > index)
 532                goto out;
 533
 534        b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
 535        i = 0;
 536        do {
 537                page = pvec.pages[i];
 538
 539                lock_page(page);
 540                if (page_has_buffers(page)) {
 541                        struct buffer_head *bh, *head;
 542
 543                        bh = head = page_buffers(page);
 544                        do {
 545                                if (b < start_blk)
 546                                        continue;
 547                                if (buffer_delay(bh)) {
 548                                        if (length == 0)
 549                                                *blkoff = b;
 550                                        length++;
 551                                } else if (length > 0) {
 552                                        goto out_locked;
 553                                }
 554                        } while (++b, bh = bh->b_this_page, bh != head);
 555                } else {
 556                        if (length > 0)
 557                                goto out_locked;
 558
 559                        b += nblocks_in_page;
 560                }
 561                unlock_page(page);
 562
 563        } while (++i < pagevec_count(&pvec));
 564
 565        index = page->index + 1;
 566        pagevec_release(&pvec);
 567        cond_resched();
 568        goto repeat;
 569
 570out_locked:
 571        unlock_page(page);
 572out:
 573        pagevec_release(&pvec);
 574        return length;
 575}
 576