linux/fs/nilfs2/page.c
<<
>>
Prefs
   1/*
   2 * page.c - buffer/page management specific to NILFS
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * Written by Ryusuke Konishi and Seiji Kihara.
  17 */
  18
  19#include <linux/pagemap.h>
  20#include <linux/writeback.h>
  21#include <linux/swap.h>
  22#include <linux/bitops.h>
  23#include <linux/page-flags.h>
  24#include <linux/list.h>
  25#include <linux/highmem.h>
  26#include <linux/pagevec.h>
  27#include <linux/gfp.h>
  28#include "nilfs.h"
  29#include "page.h"
  30#include "mdt.h"
  31
  32
  33#define NILFS_BUFFER_INHERENT_BITS                                      \
  34        (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) |       \
  35         BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked))
  36
  37static struct buffer_head *
  38__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
  39                       int blkbits, unsigned long b_state)
  40
  41{
  42        unsigned long first_block;
  43        struct buffer_head *bh;
  44
  45        if (!page_has_buffers(page))
  46                create_empty_buffers(page, 1 << blkbits, b_state);
  47
  48        first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
  49        bh = nilfs_page_get_nth_block(page, block - first_block);
  50
  51        touch_buffer(bh);
  52        wait_on_buffer(bh);
  53        return bh;
  54}
  55
  56struct buffer_head *nilfs_grab_buffer(struct inode *inode,
  57                                      struct address_space *mapping,
  58                                      unsigned long blkoff,
  59                                      unsigned long b_state)
  60{
  61        int blkbits = inode->i_blkbits;
  62        pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
  63        struct page *page;
  64        struct buffer_head *bh;
  65
  66        page = grab_cache_page(mapping, index);
  67        if (unlikely(!page))
  68                return NULL;
  69
  70        bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
  71        if (unlikely(!bh)) {
  72                unlock_page(page);
  73                put_page(page);
  74                return NULL;
  75        }
  76        return bh;
  77}
  78
  79/**
  80 * nilfs_forget_buffer - discard dirty state
  81 * @inode: owner inode of the buffer
  82 * @bh: buffer head of the buffer to be discarded
  83 */
  84void nilfs_forget_buffer(struct buffer_head *bh)
  85{
  86        struct page *page = bh->b_page;
  87        const unsigned long clear_bits =
  88                (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
  89                 BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
  90                 BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
  91
  92        lock_buffer(bh);
  93        set_mask_bits(&bh->b_state, clear_bits, 0);
  94        if (nilfs_page_buffers_clean(page))
  95                __nilfs_clear_page_dirty(page);
  96
  97        bh->b_blocknr = -1;
  98        ClearPageUptodate(page);
  99        ClearPageMappedToDisk(page);
 100        unlock_buffer(bh);
 101        brelse(bh);
 102}
 103
 104/**
 105 * nilfs_copy_buffer -- copy buffer data and flags
 106 * @dbh: destination buffer
 107 * @sbh: source buffer
 108 */
 109void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
 110{
 111        void *kaddr0, *kaddr1;
 112        unsigned long bits;
 113        struct page *spage = sbh->b_page, *dpage = dbh->b_page;
 114        struct buffer_head *bh;
 115
 116        kaddr0 = kmap_atomic(spage);
 117        kaddr1 = kmap_atomic(dpage);
 118        memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
 119        kunmap_atomic(kaddr1);
 120        kunmap_atomic(kaddr0);
 121
 122        dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
 123        dbh->b_blocknr = sbh->b_blocknr;
 124        dbh->b_bdev = sbh->b_bdev;
 125
 126        bh = dbh;
 127        bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped));
 128        while ((bh = bh->b_this_page) != dbh) {
 129                lock_buffer(bh);
 130                bits &= bh->b_state;
 131                unlock_buffer(bh);
 132        }
 133        if (bits & BIT(BH_Uptodate))
 134                SetPageUptodate(dpage);
 135        else
 136                ClearPageUptodate(dpage);
 137        if (bits & BIT(BH_Mapped))
 138                SetPageMappedToDisk(dpage);
 139        else
 140                ClearPageMappedToDisk(dpage);
 141}
 142
 143/**
 144 * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
 145 * @page: page to be checked
 146 *
 147 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
 148 * Otherwise, it returns non-zero value.
 149 */
 150int nilfs_page_buffers_clean(struct page *page)
 151{
 152        struct buffer_head *bh, *head;
 153
 154        bh = head = page_buffers(page);
 155        do {
 156                if (buffer_dirty(bh))
 157                        return 0;
 158                bh = bh->b_this_page;
 159        } while (bh != head);
 160        return 1;
 161}
 162
 163void nilfs_page_bug(struct page *page)
 164{
 165        struct address_space *m;
 166        unsigned long ino;
 167
 168        if (unlikely(!page)) {
 169                printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
 170                return;
 171        }
 172
 173        m = page->mapping;
 174        ino = m ? m->host->i_ino : 0;
 175
 176        printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
 177               "mapping=%p ino=%lu\n",
 178               page, page_ref_count(page),
 179               (unsigned long long)page->index, page->flags, m, ino);
 180
 181        if (page_has_buffers(page)) {
 182                struct buffer_head *bh, *head;
 183                int i = 0;
 184
 185                bh = head = page_buffers(page);
 186                do {
 187                        printk(KERN_CRIT
 188                               " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
 189                               i++, bh, atomic_read(&bh->b_count),
 190                               (unsigned long long)bh->b_blocknr, bh->b_state);
 191                        bh = bh->b_this_page;
 192                } while (bh != head);
 193        }
 194}
 195
 196/**
 197 * nilfs_copy_page -- copy the page with buffers
 198 * @dst: destination page
 199 * @src: source page
 200 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
 201 *
 202 * This function is for both data pages and btnode pages.  The dirty flag
 203 * should be treated by caller.  The page must not be under i/o.
 204 * Both src and dst page must be locked
 205 */
 206static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
 207{
 208        struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
 209        unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
 210
 211        BUG_ON(PageWriteback(dst));
 212
 213        sbh = sbufs = page_buffers(src);
 214        if (!page_has_buffers(dst))
 215                create_empty_buffers(dst, sbh->b_size, 0);
 216
 217        if (copy_dirty)
 218                mask |= BIT(BH_Dirty);
 219
 220        dbh = dbufs = page_buffers(dst);
 221        do {
 222                lock_buffer(sbh);
 223                lock_buffer(dbh);
 224                dbh->b_state = sbh->b_state & mask;
 225                dbh->b_blocknr = sbh->b_blocknr;
 226                dbh->b_bdev = sbh->b_bdev;
 227                sbh = sbh->b_this_page;
 228                dbh = dbh->b_this_page;
 229        } while (dbh != dbufs);
 230
 231        copy_highpage(dst, src);
 232
 233        if (PageUptodate(src) && !PageUptodate(dst))
 234                SetPageUptodate(dst);
 235        else if (!PageUptodate(src) && PageUptodate(dst))
 236                ClearPageUptodate(dst);
 237        if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
 238                SetPageMappedToDisk(dst);
 239        else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
 240                ClearPageMappedToDisk(dst);
 241
 242        do {
 243                unlock_buffer(sbh);
 244                unlock_buffer(dbh);
 245                sbh = sbh->b_this_page;
 246                dbh = dbh->b_this_page;
 247        } while (dbh != dbufs);
 248}
 249
 250int nilfs_copy_dirty_pages(struct address_space *dmap,
 251                           struct address_space *smap)
 252{
 253        struct pagevec pvec;
 254        unsigned int i;
 255        pgoff_t index = 0;
 256        int err = 0;
 257
 258        pagevec_init(&pvec);
 259repeat:
 260        if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY))
 261                return 0;
 262
 263        for (i = 0; i < pagevec_count(&pvec); i++) {
 264                struct page *page = pvec.pages[i], *dpage;
 265
 266                lock_page(page);
 267                if (unlikely(!PageDirty(page)))
 268                        NILFS_PAGE_BUG(page, "inconsistent dirty state");
 269
 270                dpage = grab_cache_page(dmap, page->index);
 271                if (unlikely(!dpage)) {
 272                        /* No empty page is added to the page cache */
 273                        err = -ENOMEM;
 274                        unlock_page(page);
 275                        break;
 276                }
 277                if (unlikely(!page_has_buffers(page)))
 278                        NILFS_PAGE_BUG(page,
 279                                       "found empty page in dat page cache");
 280
 281                nilfs_copy_page(dpage, page, 1);
 282                __set_page_dirty_nobuffers(dpage);
 283
 284                unlock_page(dpage);
 285                put_page(dpage);
 286                unlock_page(page);
 287        }
 288        pagevec_release(&pvec);
 289        cond_resched();
 290
 291        if (likely(!err))
 292                goto repeat;
 293        return err;
 294}
 295
 296/**
 297 * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
 298 * @dmap: destination page cache
 299 * @smap: source page cache
 300 *
 301 * No pages must no be added to the cache during this process.
 302 * This must be ensured by the caller.
 303 */
 304void nilfs_copy_back_pages(struct address_space *dmap,
 305                           struct address_space *smap)
 306{
 307        struct pagevec pvec;
 308        unsigned int i, n;
 309        pgoff_t index = 0;
 310        int err;
 311
 312        pagevec_init(&pvec);
 313repeat:
 314        n = pagevec_lookup(&pvec, smap, &index);
 315        if (!n)
 316                return;
 317
 318        for (i = 0; i < pagevec_count(&pvec); i++) {
 319                struct page *page = pvec.pages[i], *dpage;
 320                pgoff_t offset = page->index;
 321
 322                lock_page(page);
 323                dpage = find_lock_page(dmap, offset);
 324                if (dpage) {
 325                        /* override existing page on the destination cache */
 326                        WARN_ON(PageDirty(dpage));
 327                        nilfs_copy_page(dpage, page, 0);
 328                        unlock_page(dpage);
 329                        put_page(dpage);
 330                } else {
 331                        struct page *page2;
 332
 333                        /* move the page to the destination cache */
 334                        xa_lock_irq(&smap->i_pages);
 335                        page2 = radix_tree_delete(&smap->i_pages, offset);
 336                        WARN_ON(page2 != page);
 337
 338                        smap->nrpages--;
 339                        xa_unlock_irq(&smap->i_pages);
 340
 341                        xa_lock_irq(&dmap->i_pages);
 342                        err = radix_tree_insert(&dmap->i_pages, offset, page);
 343                        if (unlikely(err < 0)) {
 344                                WARN_ON(err == -EEXIST);
 345                                page->mapping = NULL;
 346                                put_page(page); /* for cache */
 347                        } else {
 348                                page->mapping = dmap;
 349                                dmap->nrpages++;
 350                                if (PageDirty(page))
 351                                        radix_tree_tag_set(&dmap->i_pages,
 352                                                           offset,
 353                                                           PAGECACHE_TAG_DIRTY);
 354                        }
 355                        xa_unlock_irq(&dmap->i_pages);
 356                }
 357                unlock_page(page);
 358        }
 359        pagevec_release(&pvec);
 360        cond_resched();
 361
 362        goto repeat;
 363}
 364
 365/**
 366 * nilfs_clear_dirty_pages - discard dirty pages in address space
 367 * @mapping: address space with dirty pages for discarding
 368 * @silent: suppress [true] or print [false] warning messages
 369 */
 370void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
 371{
 372        struct pagevec pvec;
 373        unsigned int i;
 374        pgoff_t index = 0;
 375
 376        pagevec_init(&pvec);
 377
 378        while (pagevec_lookup_tag(&pvec, mapping, &index,
 379                                        PAGECACHE_TAG_DIRTY)) {
 380                for (i = 0; i < pagevec_count(&pvec); i++) {
 381                        struct page *page = pvec.pages[i];
 382
 383                        lock_page(page);
 384                        nilfs_clear_dirty_page(page, silent);
 385                        unlock_page(page);
 386                }
 387                pagevec_release(&pvec);
 388                cond_resched();
 389        }
 390}
 391
 392/**
 393 * nilfs_clear_dirty_page - discard dirty page
 394 * @page: dirty page that will be discarded
 395 * @silent: suppress [true] or print [false] warning messages
 396 */
 397void nilfs_clear_dirty_page(struct page *page, bool silent)
 398{
 399        struct inode *inode = page->mapping->host;
 400        struct super_block *sb = inode->i_sb;
 401
 402        BUG_ON(!PageLocked(page));
 403
 404        if (!silent)
 405                nilfs_msg(sb, KERN_WARNING,
 406                          "discard dirty page: offset=%lld, ino=%lu",
 407                          page_offset(page), inode->i_ino);
 408
 409        ClearPageUptodate(page);
 410        ClearPageMappedToDisk(page);
 411
 412        if (page_has_buffers(page)) {
 413                struct buffer_head *bh, *head;
 414                const unsigned long clear_bits =
 415                        (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
 416                         BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
 417                         BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
 418
 419                bh = head = page_buffers(page);
 420                do {
 421                        lock_buffer(bh);
 422                        if (!silent)
 423                                nilfs_msg(sb, KERN_WARNING,
 424                                          "discard dirty block: blocknr=%llu, size=%zu",
 425                                          (u64)bh->b_blocknr, bh->b_size);
 426
 427                        set_mask_bits(&bh->b_state, clear_bits, 0);
 428                        unlock_buffer(bh);
 429                } while (bh = bh->b_this_page, bh != head);
 430        }
 431
 432        __nilfs_clear_page_dirty(page);
 433}
 434
 435unsigned int nilfs_page_count_clean_buffers(struct page *page,
 436                                            unsigned int from, unsigned int to)
 437{
 438        unsigned int block_start, block_end;
 439        struct buffer_head *bh, *head;
 440        unsigned int nc = 0;
 441
 442        for (bh = head = page_buffers(page), block_start = 0;
 443             bh != head || !block_start;
 444             block_start = block_end, bh = bh->b_this_page) {
 445                block_end = block_start + bh->b_size;
 446                if (block_end > from && block_start < to && !buffer_dirty(bh))
 447                        nc++;
 448        }
 449        return nc;
 450}
 451
 452void nilfs_mapping_init(struct address_space *mapping, struct inode *inode)
 453{
 454        mapping->host = inode;
 455        mapping->flags = 0;
 456        mapping_set_gfp_mask(mapping, GFP_NOFS);
 457        mapping->private_data = NULL;
 458        mapping->a_ops = &empty_aops;
 459}
 460
 461/*
 462 * NILFS2 needs clear_page_dirty() in the following two cases:
 463 *
 464 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
 465 *    page dirty flags when it copies back pages from the shadow cache
 466 *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
 467 *    (dat->{i_mapping,i_btnode_cache}).
 468 *
 469 * 2) Some B-tree operations like insertion or deletion may dispose buffers
 470 *    in dirty state, and this needs to cancel the dirty state of their pages.
 471 */
 472int __nilfs_clear_page_dirty(struct page *page)
 473{
 474        struct address_space *mapping = page->mapping;
 475
 476        if (mapping) {
 477                xa_lock_irq(&mapping->i_pages);
 478                if (test_bit(PG_dirty, &page->flags)) {
 479                        radix_tree_tag_clear(&mapping->i_pages,
 480                                             page_index(page),
 481                                             PAGECACHE_TAG_DIRTY);
 482                        xa_unlock_irq(&mapping->i_pages);
 483                        return clear_page_dirty_for_io(page);
 484                }
 485                xa_unlock_irq(&mapping->i_pages);
 486                return 0;
 487        }
 488        return TestClearPageDirty(page);
 489}
 490
 491/**
 492 * nilfs_find_uncommitted_extent - find extent of uncommitted data
 493 * @inode: inode
 494 * @start_blk: start block offset (in)
 495 * @blkoff: start offset of the found extent (out)
 496 *
 497 * This function searches an extent of buffers marked "delayed" which
 498 * starts from a block offset equal to or larger than @start_blk.  If
 499 * such an extent was found, this will store the start offset in
 500 * @blkoff and return its length in blocks.  Otherwise, zero is
 501 * returned.
 502 */
 503unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
 504                                            sector_t start_blk,
 505                                            sector_t *blkoff)
 506{
 507        unsigned int i;
 508        pgoff_t index;
 509        unsigned int nblocks_in_page;
 510        unsigned long length = 0;
 511        sector_t b;
 512        struct pagevec pvec;
 513        struct page *page;
 514
 515        if (inode->i_mapping->nrpages == 0)
 516                return 0;
 517
 518        index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
 519        nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
 520
 521        pagevec_init(&pvec);
 522
 523repeat:
 524        pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
 525                                        pvec.pages);
 526        if (pvec.nr == 0)
 527                return length;
 528
 529        if (length > 0 && pvec.pages[0]->index > index)
 530                goto out;
 531
 532        b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
 533        i = 0;
 534        do {
 535                page = pvec.pages[i];
 536
 537                lock_page(page);
 538                if (page_has_buffers(page)) {
 539                        struct buffer_head *bh, *head;
 540
 541                        bh = head = page_buffers(page);
 542                        do {
 543                                if (b < start_blk)
 544                                        continue;
 545                                if (buffer_delay(bh)) {
 546                                        if (length == 0)
 547                                                *blkoff = b;
 548                                        length++;
 549                                } else if (length > 0) {
 550                                        goto out_locked;
 551                                }
 552                        } while (++b, bh = bh->b_this_page, bh != head);
 553                } else {
 554                        if (length > 0)
 555                                goto out_locked;
 556
 557                        b += nblocks_in_page;
 558                }
 559                unlock_page(page);
 560
 561        } while (++i < pagevec_count(&pvec));
 562
 563        index = page->index + 1;
 564        pagevec_release(&pvec);
 565        cond_resched();
 566        goto repeat;
 567
 568out_locked:
 569        unlock_page(page);
 570out:
 571        pagevec_release(&pvec);
 572        return length;
 573}
 574