linux/fs/nilfs2/page.c
<<
>>
Prefs
   1/*
   2 * page.c - buffer/page management specific to NILFS
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * Written by Ryusuke Konishi and Seiji Kihara.
  17 */
  18
  19#include <linux/pagemap.h>
  20#include <linux/writeback.h>
  21#include <linux/swap.h>
  22#include <linux/bitops.h>
  23#include <linux/page-flags.h>
  24#include <linux/list.h>
  25#include <linux/highmem.h>
  26#include <linux/pagevec.h>
  27#include <linux/gfp.h>
  28#include "nilfs.h"
  29#include "page.h"
  30#include "mdt.h"
  31
  32
  33#define NILFS_BUFFER_INHERENT_BITS                                      \
  34        (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) |       \
  35         BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked))
  36
  37static struct buffer_head *
  38__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
  39                       int blkbits, unsigned long b_state)
  40
  41{
  42        unsigned long first_block;
  43        struct buffer_head *bh;
  44
  45        if (!page_has_buffers(page))
  46                create_empty_buffers(page, 1 << blkbits, b_state);
  47
  48        first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
  49        bh = nilfs_page_get_nth_block(page, block - first_block);
  50
  51        touch_buffer(bh);
  52        wait_on_buffer(bh);
  53        return bh;
  54}
  55
  56struct buffer_head *nilfs_grab_buffer(struct inode *inode,
  57                                      struct address_space *mapping,
  58                                      unsigned long blkoff,
  59                                      unsigned long b_state)
  60{
  61        int blkbits = inode->i_blkbits;
  62        pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
  63        struct page *page;
  64        struct buffer_head *bh;
  65
  66        page = grab_cache_page(mapping, index);
  67        if (unlikely(!page))
  68                return NULL;
  69
  70        bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
  71        if (unlikely(!bh)) {
  72                unlock_page(page);
  73                put_page(page);
  74                return NULL;
  75        }
  76        return bh;
  77}
  78
  79/**
  80 * nilfs_forget_buffer - discard dirty state
  81 * @inode: owner inode of the buffer
  82 * @bh: buffer head of the buffer to be discarded
  83 */
  84void nilfs_forget_buffer(struct buffer_head *bh)
  85{
  86        struct page *page = bh->b_page;
  87        const unsigned long clear_bits =
  88                (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
  89                 BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
  90                 BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
  91
  92        lock_buffer(bh);
  93        set_mask_bits(&bh->b_state, clear_bits, 0);
  94        if (nilfs_page_buffers_clean(page))
  95                __nilfs_clear_page_dirty(page);
  96
  97        bh->b_blocknr = -1;
  98        ClearPageUptodate(page);
  99        ClearPageMappedToDisk(page);
 100        unlock_buffer(bh);
 101        brelse(bh);
 102}
 103
 104/**
 105 * nilfs_copy_buffer -- copy buffer data and flags
 106 * @dbh: destination buffer
 107 * @sbh: source buffer
 108 */
 109void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
 110{
 111        void *kaddr0, *kaddr1;
 112        unsigned long bits;
 113        struct page *spage = sbh->b_page, *dpage = dbh->b_page;
 114        struct buffer_head *bh;
 115
 116        kaddr0 = kmap_atomic(spage);
 117        kaddr1 = kmap_atomic(dpage);
 118        memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
 119        kunmap_atomic(kaddr1);
 120        kunmap_atomic(kaddr0);
 121
 122        dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
 123        dbh->b_blocknr = sbh->b_blocknr;
 124        dbh->b_bdev = sbh->b_bdev;
 125
 126        bh = dbh;
 127        bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped));
 128        while ((bh = bh->b_this_page) != dbh) {
 129                lock_buffer(bh);
 130                bits &= bh->b_state;
 131                unlock_buffer(bh);
 132        }
 133        if (bits & BIT(BH_Uptodate))
 134                SetPageUptodate(dpage);
 135        else
 136                ClearPageUptodate(dpage);
 137        if (bits & BIT(BH_Mapped))
 138                SetPageMappedToDisk(dpage);
 139        else
 140                ClearPageMappedToDisk(dpage);
 141}
 142
 143/**
 144 * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
 145 * @page: page to be checked
 146 *
 147 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
 148 * Otherwise, it returns non-zero value.
 149 */
 150int nilfs_page_buffers_clean(struct page *page)
 151{
 152        struct buffer_head *bh, *head;
 153
 154        bh = head = page_buffers(page);
 155        do {
 156                if (buffer_dirty(bh))
 157                        return 0;
 158                bh = bh->b_this_page;
 159        } while (bh != head);
 160        return 1;
 161}
 162
 163void nilfs_page_bug(struct page *page)
 164{
 165        struct address_space *m;
 166        unsigned long ino;
 167
 168        if (unlikely(!page)) {
 169                printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
 170                return;
 171        }
 172
 173        m = page->mapping;
 174        ino = m ? m->host->i_ino : 0;
 175
 176        printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
 177               "mapping=%p ino=%lu\n",
 178               page, page_ref_count(page),
 179               (unsigned long long)page->index, page->flags, m, ino);
 180
 181        if (page_has_buffers(page)) {
 182                struct buffer_head *bh, *head;
 183                int i = 0;
 184
 185                bh = head = page_buffers(page);
 186                do {
 187                        printk(KERN_CRIT
 188                               " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
 189                               i++, bh, atomic_read(&bh->b_count),
 190                               (unsigned long long)bh->b_blocknr, bh->b_state);
 191                        bh = bh->b_this_page;
 192                } while (bh != head);
 193        }
 194}
 195
 196/**
 197 * nilfs_copy_page -- copy the page with buffers
 198 * @dst: destination page
 199 * @src: source page
 200 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
 201 *
 202 * This function is for both data pages and btnode pages.  The dirty flag
 203 * should be treated by caller.  The page must not be under i/o.
 204 * Both src and dst page must be locked
 205 */
 206static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
 207{
 208        struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
 209        unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
 210
 211        BUG_ON(PageWriteback(dst));
 212
 213        sbh = sbufs = page_buffers(src);
 214        if (!page_has_buffers(dst))
 215                create_empty_buffers(dst, sbh->b_size, 0);
 216
 217        if (copy_dirty)
 218                mask |= BIT(BH_Dirty);
 219
 220        dbh = dbufs = page_buffers(dst);
 221        do {
 222                lock_buffer(sbh);
 223                lock_buffer(dbh);
 224                dbh->b_state = sbh->b_state & mask;
 225                dbh->b_blocknr = sbh->b_blocknr;
 226                dbh->b_bdev = sbh->b_bdev;
 227                sbh = sbh->b_this_page;
 228                dbh = dbh->b_this_page;
 229        } while (dbh != dbufs);
 230
 231        copy_highpage(dst, src);
 232
 233        if (PageUptodate(src) && !PageUptodate(dst))
 234                SetPageUptodate(dst);
 235        else if (!PageUptodate(src) && PageUptodate(dst))
 236                ClearPageUptodate(dst);
 237        if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
 238                SetPageMappedToDisk(dst);
 239        else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
 240                ClearPageMappedToDisk(dst);
 241
 242        do {
 243                unlock_buffer(sbh);
 244                unlock_buffer(dbh);
 245                sbh = sbh->b_this_page;
 246                dbh = dbh->b_this_page;
 247        } while (dbh != dbufs);
 248}
 249
 250int nilfs_copy_dirty_pages(struct address_space *dmap,
 251                           struct address_space *smap)
 252{
 253        struct pagevec pvec;
 254        unsigned int i;
 255        pgoff_t index = 0;
 256        int err = 0;
 257
 258        pagevec_init(&pvec, 0);
 259repeat:
 260        if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
 261                                PAGEVEC_SIZE))
 262                return 0;
 263
 264        for (i = 0; i < pagevec_count(&pvec); i++) {
 265                struct page *page = pvec.pages[i], *dpage;
 266
 267                lock_page(page);
 268                if (unlikely(!PageDirty(page)))
 269                        NILFS_PAGE_BUG(page, "inconsistent dirty state");
 270
 271                dpage = grab_cache_page(dmap, page->index);
 272                if (unlikely(!dpage)) {
 273                        /* No empty page is added to the page cache */
 274                        err = -ENOMEM;
 275                        unlock_page(page);
 276                        break;
 277                }
 278                if (unlikely(!page_has_buffers(page)))
 279                        NILFS_PAGE_BUG(page,
 280                                       "found empty page in dat page cache");
 281
 282                nilfs_copy_page(dpage, page, 1);
 283                __set_page_dirty_nobuffers(dpage);
 284
 285                unlock_page(dpage);
 286                put_page(dpage);
 287                unlock_page(page);
 288        }
 289        pagevec_release(&pvec);
 290        cond_resched();
 291
 292        if (likely(!err))
 293                goto repeat;
 294        return err;
 295}
 296
 297/**
 298 * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
 299 * @dmap: destination page cache
 300 * @smap: source page cache
 301 *
 302 * No pages must no be added to the cache during this process.
 303 * This must be ensured by the caller.
 304 */
 305void nilfs_copy_back_pages(struct address_space *dmap,
 306                           struct address_space *smap)
 307{
 308        struct pagevec pvec;
 309        unsigned int i, n;
 310        pgoff_t index = 0;
 311        int err;
 312
 313        pagevec_init(&pvec, 0);
 314repeat:
 315        n = pagevec_lookup(&pvec, smap, &index);
 316        if (!n)
 317                return;
 318
 319        for (i = 0; i < pagevec_count(&pvec); i++) {
 320                struct page *page = pvec.pages[i], *dpage;
 321                pgoff_t offset = page->index;
 322
 323                lock_page(page);
 324                dpage = find_lock_page(dmap, offset);
 325                if (dpage) {
 326                        /* override existing page on the destination cache */
 327                        WARN_ON(PageDirty(dpage));
 328                        nilfs_copy_page(dpage, page, 0);
 329                        unlock_page(dpage);
 330                        put_page(dpage);
 331                } else {
 332                        struct page *page2;
 333
 334                        /* move the page to the destination cache */
 335                        spin_lock_irq(&smap->tree_lock);
 336                        page2 = radix_tree_delete(&smap->page_tree, offset);
 337                        WARN_ON(page2 != page);
 338
 339                        smap->nrpages--;
 340                        spin_unlock_irq(&smap->tree_lock);
 341
 342                        spin_lock_irq(&dmap->tree_lock);
 343                        err = radix_tree_insert(&dmap->page_tree, offset, page);
 344                        if (unlikely(err < 0)) {
 345                                WARN_ON(err == -EEXIST);
 346                                page->mapping = NULL;
 347                                put_page(page); /* for cache */
 348                        } else {
 349                                page->mapping = dmap;
 350                                dmap->nrpages++;
 351                                if (PageDirty(page))
 352                                        radix_tree_tag_set(&dmap->page_tree,
 353                                                           offset,
 354                                                           PAGECACHE_TAG_DIRTY);
 355                        }
 356                        spin_unlock_irq(&dmap->tree_lock);
 357                }
 358                unlock_page(page);
 359        }
 360        pagevec_release(&pvec);
 361        cond_resched();
 362
 363        goto repeat;
 364}
 365
 366/**
 367 * nilfs_clear_dirty_pages - discard dirty pages in address space
 368 * @mapping: address space with dirty pages for discarding
 369 * @silent: suppress [true] or print [false] warning messages
 370 */
 371void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
 372{
 373        struct pagevec pvec;
 374        unsigned int i;
 375        pgoff_t index = 0;
 376
 377        pagevec_init(&pvec, 0);
 378
 379        while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
 380                                  PAGEVEC_SIZE)) {
 381                for (i = 0; i < pagevec_count(&pvec); i++) {
 382                        struct page *page = pvec.pages[i];
 383
 384                        lock_page(page);
 385                        nilfs_clear_dirty_page(page, silent);
 386                        unlock_page(page);
 387                }
 388                pagevec_release(&pvec);
 389                cond_resched();
 390        }
 391}
 392
 393/**
 394 * nilfs_clear_dirty_page - discard dirty page
 395 * @page: dirty page that will be discarded
 396 * @silent: suppress [true] or print [false] warning messages
 397 */
 398void nilfs_clear_dirty_page(struct page *page, bool silent)
 399{
 400        struct inode *inode = page->mapping->host;
 401        struct super_block *sb = inode->i_sb;
 402
 403        BUG_ON(!PageLocked(page));
 404
 405        if (!silent)
 406                nilfs_msg(sb, KERN_WARNING,
 407                          "discard dirty page: offset=%lld, ino=%lu",
 408                          page_offset(page), inode->i_ino);
 409
 410        ClearPageUptodate(page);
 411        ClearPageMappedToDisk(page);
 412
 413        if (page_has_buffers(page)) {
 414                struct buffer_head *bh, *head;
 415                const unsigned long clear_bits =
 416                        (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
 417                         BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
 418                         BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
 419
 420                bh = head = page_buffers(page);
 421                do {
 422                        lock_buffer(bh);
 423                        if (!silent)
 424                                nilfs_msg(sb, KERN_WARNING,
 425                                          "discard dirty block: blocknr=%llu, size=%zu",
 426                                          (u64)bh->b_blocknr, bh->b_size);
 427
 428                        set_mask_bits(&bh->b_state, clear_bits, 0);
 429                        unlock_buffer(bh);
 430                } while (bh = bh->b_this_page, bh != head);
 431        }
 432
 433        __nilfs_clear_page_dirty(page);
 434}
 435
 436unsigned int nilfs_page_count_clean_buffers(struct page *page,
 437                                            unsigned int from, unsigned int to)
 438{
 439        unsigned int block_start, block_end;
 440        struct buffer_head *bh, *head;
 441        unsigned int nc = 0;
 442
 443        for (bh = head = page_buffers(page), block_start = 0;
 444             bh != head || !block_start;
 445             block_start = block_end, bh = bh->b_this_page) {
 446                block_end = block_start + bh->b_size;
 447                if (block_end > from && block_start < to && !buffer_dirty(bh))
 448                        nc++;
 449        }
 450        return nc;
 451}
 452
 453void nilfs_mapping_init(struct address_space *mapping, struct inode *inode)
 454{
 455        mapping->host = inode;
 456        mapping->flags = 0;
 457        mapping_set_gfp_mask(mapping, GFP_NOFS);
 458        mapping->private_data = NULL;
 459        mapping->a_ops = &empty_aops;
 460}
 461
 462/*
 463 * NILFS2 needs clear_page_dirty() in the following two cases:
 464 *
 465 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
 466 *    page dirty flags when it copies back pages from the shadow cache
 467 *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
 468 *    (dat->{i_mapping,i_btnode_cache}).
 469 *
 470 * 2) Some B-tree operations like insertion or deletion may dispose buffers
 471 *    in dirty state, and this needs to cancel the dirty state of their pages.
 472 */
 473int __nilfs_clear_page_dirty(struct page *page)
 474{
 475        struct address_space *mapping = page->mapping;
 476
 477        if (mapping) {
 478                spin_lock_irq(&mapping->tree_lock);
 479                if (test_bit(PG_dirty, &page->flags)) {
 480                        radix_tree_tag_clear(&mapping->page_tree,
 481                                             page_index(page),
 482                                             PAGECACHE_TAG_DIRTY);
 483                        spin_unlock_irq(&mapping->tree_lock);
 484                        return clear_page_dirty_for_io(page);
 485                }
 486                spin_unlock_irq(&mapping->tree_lock);
 487                return 0;
 488        }
 489        return TestClearPageDirty(page);
 490}
 491
 492/**
 493 * nilfs_find_uncommitted_extent - find extent of uncommitted data
 494 * @inode: inode
 495 * @start_blk: start block offset (in)
 496 * @blkoff: start offset of the found extent (out)
 497 *
 498 * This function searches an extent of buffers marked "delayed" which
 499 * starts from a block offset equal to or larger than @start_blk.  If
 500 * such an extent was found, this will store the start offset in
 501 * @blkoff and return its length in blocks.  Otherwise, zero is
 502 * returned.
 503 */
 504unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
 505                                            sector_t start_blk,
 506                                            sector_t *blkoff)
 507{
 508        unsigned int i;
 509        pgoff_t index;
 510        unsigned int nblocks_in_page;
 511        unsigned long length = 0;
 512        sector_t b;
 513        struct pagevec pvec;
 514        struct page *page;
 515
 516        if (inode->i_mapping->nrpages == 0)
 517                return 0;
 518
 519        index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
 520        nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
 521
 522        pagevec_init(&pvec, 0);
 523
 524repeat:
 525        pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
 526                                        pvec.pages);
 527        if (pvec.nr == 0)
 528                return length;
 529
 530        if (length > 0 && pvec.pages[0]->index > index)
 531                goto out;
 532
 533        b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
 534        i = 0;
 535        do {
 536                page = pvec.pages[i];
 537
 538                lock_page(page);
 539                if (page_has_buffers(page)) {
 540                        struct buffer_head *bh, *head;
 541
 542                        bh = head = page_buffers(page);
 543                        do {
 544                                if (b < start_blk)
 545                                        continue;
 546                                if (buffer_delay(bh)) {
 547                                        if (length == 0)
 548                                                *blkoff = b;
 549                                        length++;
 550                                } else if (length > 0) {
 551                                        goto out_locked;
 552                                }
 553                        } while (++b, bh = bh->b_this_page, bh != head);
 554                } else {
 555                        if (length > 0)
 556                                goto out_locked;
 557
 558                        b += nblocks_in_page;
 559                }
 560                unlock_page(page);
 561
 562        } while (++i < pagevec_count(&pvec));
 563
 564        index = page->index + 1;
 565        pagevec_release(&pvec);
 566        cond_resched();
 567        goto repeat;
 568
 569out_locked:
 570        unlock_page(page);
 571out:
 572        pagevec_release(&pvec);
 573        return length;
 574}
 575