linux/fs/gfs2/aops.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/sched.h>
  11#include <linux/slab.h>
  12#include <linux/spinlock.h>
  13#include <linux/completion.h>
  14#include <linux/buffer_head.h>
  15#include <linux/pagemap.h>
  16#include <linux/pagevec.h>
  17#include <linux/mpage.h>
  18#include <linux/fs.h>
  19#include <linux/writeback.h>
  20#include <linux/swap.h>
  21#include <linux/gfs2_ondisk.h>
  22#include <linux/backing-dev.h>
  23#include <linux/uio.h>
  24#include <trace/events/writeback.h>
  25
  26#include "gfs2.h"
  27#include "incore.h"
  28#include "bmap.h"
  29#include "glock.h"
  30#include "inode.h"
  31#include "log.h"
  32#include "meta_io.h"
  33#include "quota.h"
  34#include "trans.h"
  35#include "rgrp.h"
  36#include "super.h"
  37#include "util.h"
  38#include "glops.h"
  39
  40
  41static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
  42                                   unsigned int from, unsigned int to)
  43{
  44        struct buffer_head *head = page_buffers(page);
  45        unsigned int bsize = head->b_size;
  46        struct buffer_head *bh;
  47        unsigned int start, end;
  48
  49        for (bh = head, start = 0; bh != head || !start;
  50             bh = bh->b_this_page, start = end) {
  51                end = start + bsize;
  52                if (end <= from || start >= to)
  53                        continue;
  54                if (gfs2_is_jdata(ip))
  55                        set_buffer_uptodate(bh);
  56                gfs2_trans_add_data(ip->i_gl, bh);
  57        }
  58}
  59
  60/**
  61 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
  62 * @inode: The inode
  63 * @lblock: The block number to look up
  64 * @bh_result: The buffer head to return the result in
  65 * @create: Non-zero if we may add block to the file
  66 *
  67 * Returns: errno
  68 */
  69
  70static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
  71                                  struct buffer_head *bh_result, int create)
  72{
  73        int error;
  74
  75        error = gfs2_block_map(inode, lblock, bh_result, 0);
  76        if (error)
  77                return error;
  78        if (!buffer_mapped(bh_result))
  79                return -EIO;
  80        return 0;
  81}
  82
  83static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
  84                                 struct buffer_head *bh_result, int create)
  85{
  86        return gfs2_block_map(inode, lblock, bh_result, 0);
  87}
  88
  89/**
  90 * gfs2_writepage_common - Common bits of writepage
  91 * @page: The page to be written
  92 * @wbc: The writeback control
  93 *
  94 * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
  95 */
  96
  97static int gfs2_writepage_common(struct page *page,
  98                                 struct writeback_control *wbc)
  99{
 100        struct inode *inode = page->mapping->host;
 101        struct gfs2_inode *ip = GFS2_I(inode);
 102        struct gfs2_sbd *sdp = GFS2_SB(inode);
 103        loff_t i_size = i_size_read(inode);
 104        pgoff_t end_index = i_size >> PAGE_SHIFT;
 105        unsigned offset;
 106
 107        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
 108                goto out;
 109        if (current->journal_info)
 110                goto redirty;
 111        /* Is the page fully outside i_size? (truncate in progress) */
 112        offset = i_size & (PAGE_SIZE-1);
 113        if (page->index > end_index || (page->index == end_index && !offset)) {
 114                page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 115                goto out;
 116        }
 117        return 1;
 118redirty:
 119        redirty_page_for_writepage(wbc, page);
 120out:
 121        unlock_page(page);
 122        return 0;
 123}
 124
 125/**
 126 * gfs2_writepage - Write page for writeback mappings
 127 * @page: The page
 128 * @wbc: The writeback control
 129 *
 130 */
 131
 132static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 133{
 134        int ret;
 135
 136        ret = gfs2_writepage_common(page, wbc);
 137        if (ret <= 0)
 138                return ret;
 139
 140        return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
 141}
 142
 143/**
 144 * __gfs2_jdata_writepage - The core of jdata writepage
 145 * @page: The page to write
 146 * @wbc: The writeback control
 147 *
 148 * This is shared between writepage and writepages and implements the
 149 * core of the writepage operation. If a transaction is required then
 150 * PageChecked will have been set and the transaction will have
 151 * already been started before this is called.
 152 */
 153
 154static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 155{
 156        struct inode *inode = page->mapping->host;
 157        struct gfs2_inode *ip = GFS2_I(inode);
 158        struct gfs2_sbd *sdp = GFS2_SB(inode);
 159
 160        if (PageChecked(page)) {
 161                ClearPageChecked(page);
 162                if (!page_has_buffers(page)) {
 163                        create_empty_buffers(page, inode->i_sb->s_blocksize,
 164                                             (1 << BH_Dirty)|(1 << BH_Uptodate));
 165                }
 166                gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
 167        }
 168        return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
 169}
 170
 171/**
 172 * gfs2_jdata_writepage - Write complete page
 173 * @page: Page to write
 174 * @wbc: The writeback control
 175 *
 176 * Returns: errno
 177 *
 178 */
 179
 180static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 181{
 182        struct inode *inode = page->mapping->host;
 183        struct gfs2_sbd *sdp = GFS2_SB(inode);
 184        int ret;
 185        int done_trans = 0;
 186
 187        if (PageChecked(page)) {
 188                if (wbc->sync_mode != WB_SYNC_ALL)
 189                        goto out_ignore;
 190                ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
 191                if (ret)
 192                        goto out_ignore;
 193                done_trans = 1;
 194        }
 195        ret = gfs2_writepage_common(page, wbc);
 196        if (ret > 0)
 197                ret = __gfs2_jdata_writepage(page, wbc);
 198        if (done_trans)
 199                gfs2_trans_end(sdp);
 200        return ret;
 201
 202out_ignore:
 203        redirty_page_for_writepage(wbc, page);
 204        unlock_page(page);
 205        return 0;
 206}
 207
 208/**
 209 * gfs2_writepages - Write a bunch of dirty pages back to disk
 210 * @mapping: The mapping to write
 211 * @wbc: Write-back control
 212 *
 213 * Used for both ordered and writeback modes.
 214 */
 215static int gfs2_writepages(struct address_space *mapping,
 216                           struct writeback_control *wbc)
 217{
 218        return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
 219}
 220
 221/**
 222 * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
 223 * @mapping: The mapping
 224 * @wbc: The writeback control
 225 * @pvec: The vector of pages
 226 * @nr_pages: The number of pages to write
 227 * @end: End position
 228 * @done_index: Page index
 229 *
 230 * Returns: non-zero if loop should terminate, zero otherwise
 231 */
 232
 233static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 234                                    struct writeback_control *wbc,
 235                                    struct pagevec *pvec,
 236                                    int nr_pages, pgoff_t end,
 237                                    pgoff_t *done_index)
 238{
 239        struct inode *inode = mapping->host;
 240        struct gfs2_sbd *sdp = GFS2_SB(inode);
 241        unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
 242        int i;
 243        int ret;
 244
 245        ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
 246        if (ret < 0)
 247                return ret;
 248
 249        for(i = 0; i < nr_pages; i++) {
 250                struct page *page = pvec->pages[i];
 251
 252                /*
 253                 * At this point, the page may be truncated or
 254                 * invalidated (changing page->mapping to NULL), or
 255                 * even swizzled back from swapper_space to tmpfs file
 256                 * mapping. However, page->index will not change
 257                 * because we have a reference on the page.
 258                 */
 259                if (page->index > end) {
 260                        /*
 261                         * can't be range_cyclic (1st pass) because
 262                         * end == -1 in that case.
 263                         */
 264                        ret = 1;
 265                        break;
 266                }
 267
 268                *done_index = page->index;
 269
 270                lock_page(page);
 271
 272                if (unlikely(page->mapping != mapping)) {
 273continue_unlock:
 274                        unlock_page(page);
 275                        continue;
 276                }
 277
 278                if (!PageDirty(page)) {
 279                        /* someone wrote it for us */
 280                        goto continue_unlock;
 281                }
 282
 283                if (PageWriteback(page)) {
 284                        if (wbc->sync_mode != WB_SYNC_NONE)
 285                                wait_on_page_writeback(page);
 286                        else
 287                                goto continue_unlock;
 288                }
 289
 290                BUG_ON(PageWriteback(page));
 291                if (!clear_page_dirty_for_io(page))
 292                        goto continue_unlock;
 293
 294                trace_wbc_writepage(wbc, inode_to_bdi(inode));
 295
 296                ret = __gfs2_jdata_writepage(page, wbc);
 297                if (unlikely(ret)) {
 298                        if (ret == AOP_WRITEPAGE_ACTIVATE) {
 299                                unlock_page(page);
 300                                ret = 0;
 301                        } else {
 302
 303                                /*
 304                                 * done_index is set past this page,
 305                                 * so media errors will not choke
 306                                 * background writeout for the entire
 307                                 * file. This has consequences for
 308                                 * range_cyclic semantics (ie. it may
 309                                 * not be suitable for data integrity
 310                                 * writeout).
 311                                 */
 312                                *done_index = page->index + 1;
 313                                ret = 1;
 314                                break;
 315                        }
 316                }
 317
 318                /*
 319                 * We stop writing back only if we are not doing
 320                 * integrity sync. In case of integrity sync we have to
 321                 * keep going until we have written all the pages
 322                 * we tagged for writeback prior to entering this loop.
 323                 */
 324                if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) {
 325                        ret = 1;
 326                        break;
 327                }
 328
 329        }
 330        gfs2_trans_end(sdp);
 331        return ret;
 332}
 333
 334/**
 335 * gfs2_write_cache_jdata - Like write_cache_pages but different
 336 * @mapping: The mapping to write
 337 * @wbc: The writeback control
 338 *
 339 * The reason that we use our own function here is that we need to
 340 * start transactions before we grab page locks. This allows us
 341 * to get the ordering right.
 342 */
 343
 344static int gfs2_write_cache_jdata(struct address_space *mapping,
 345                                  struct writeback_control *wbc)
 346{
 347        int ret = 0;
 348        int done = 0;
 349        struct pagevec pvec;
 350        int nr_pages;
 351        pgoff_t uninitialized_var(writeback_index);
 352        pgoff_t index;
 353        pgoff_t end;
 354        pgoff_t done_index;
 355        int cycled;
 356        int range_whole = 0;
 357        int tag;
 358
 359        pagevec_init(&pvec, 0);
 360        if (wbc->range_cyclic) {
 361                writeback_index = mapping->writeback_index; /* prev offset */
 362                index = writeback_index;
 363                if (index == 0)
 364                        cycled = 1;
 365                else
 366                        cycled = 0;
 367                end = -1;
 368        } else {
 369                index = wbc->range_start >> PAGE_SHIFT;
 370                end = wbc->range_end >> PAGE_SHIFT;
 371                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 372                        range_whole = 1;
 373                cycled = 1; /* ignore range_cyclic tests */
 374        }
 375        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 376                tag = PAGECACHE_TAG_TOWRITE;
 377        else
 378                tag = PAGECACHE_TAG_DIRTY;
 379
 380retry:
 381        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 382                tag_pages_for_writeback(mapping, index, end);
 383        done_index = index;
 384        while (!done && (index <= end)) {
 385                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
 386                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 387                if (nr_pages == 0)
 388                        break;
 389
 390                ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index);
 391                if (ret)
 392                        done = 1;
 393                if (ret > 0)
 394                        ret = 0;
 395                pagevec_release(&pvec);
 396                cond_resched();
 397        }
 398
 399        if (!cycled && !done) {
 400                /*
 401                 * range_cyclic:
 402                 * We hit the last page and there is more work to be done: wrap
 403                 * back to the start of the file
 404                 */
 405                cycled = 1;
 406                index = 0;
 407                end = writeback_index - 1;
 408                goto retry;
 409        }
 410
 411        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 412                mapping->writeback_index = done_index;
 413
 414        return ret;
 415}
 416
 417
 418/**
 419 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
 420 * @mapping: The mapping to write
 421 * @wbc: The writeback control
 422 * 
 423 */
 424
 425static int gfs2_jdata_writepages(struct address_space *mapping,
 426                                 struct writeback_control *wbc)
 427{
 428        struct gfs2_inode *ip = GFS2_I(mapping->host);
 429        struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 430        int ret;
 431
 432        ret = gfs2_write_cache_jdata(mapping, wbc);
 433        if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
 434                gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
 435                ret = gfs2_write_cache_jdata(mapping, wbc);
 436        }
 437        return ret;
 438}
 439
 440/**
 441 * stuffed_readpage - Fill in a Linux page with stuffed file data
 442 * @ip: the inode
 443 * @page: the page
 444 *
 445 * Returns: errno
 446 */
 447
 448static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 449{
 450        struct buffer_head *dibh;
 451        u64 dsize = i_size_read(&ip->i_inode);
 452        void *kaddr;
 453        int error;
 454
 455        /*
 456         * Due to the order of unstuffing files and ->fault(), we can be
 457         * asked for a zero page in the case of a stuffed file being extended,
 458         * so we need to supply one here. It doesn't happen often.
 459         */
 460        if (unlikely(page->index)) {
 461                zero_user(page, 0, PAGE_SIZE);
 462                SetPageUptodate(page);
 463                return 0;
 464        }
 465
 466        error = gfs2_meta_inode_buffer(ip, &dibh);
 467        if (error)
 468                return error;
 469
 470        kaddr = kmap_atomic(page);
 471        if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
 472                dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
 473        memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 474        memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 475        kunmap_atomic(kaddr);
 476        flush_dcache_page(page);
 477        brelse(dibh);
 478        SetPageUptodate(page);
 479
 480        return 0;
 481}
 482
 483
 484/**
 485 * __gfs2_readpage - readpage
 486 * @file: The file to read a page for
 487 * @page: The page to read
 488 *
 489 * This is the core of gfs2's readpage. Its used by the internal file
 490 * reading code as in that case we already hold the glock. Also its
 491 * called by gfs2_readpage() once the required lock has been granted.
 492 *
 493 */
 494
 495static int __gfs2_readpage(void *file, struct page *page)
 496{
 497        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 498        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 499        int error;
 500
 501        if (gfs2_is_stuffed(ip)) {
 502                error = stuffed_readpage(ip, page);
 503                unlock_page(page);
 504        } else {
 505                error = mpage_readpage(page, gfs2_block_map);
 506        }
 507
 508        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 509                return -EIO;
 510
 511        return error;
 512}
 513
 514/**
 515 * gfs2_readpage - read a page of a file
 516 * @file: The file to read
 517 * @page: The page of the file
 518 *
 519 * This deals with the locking required. We have to unlock and
 520 * relock the page in order to get the locking in the right
 521 * order.
 522 */
 523
 524static int gfs2_readpage(struct file *file, struct page *page)
 525{
 526        struct address_space *mapping = page->mapping;
 527        struct gfs2_inode *ip = GFS2_I(mapping->host);
 528        struct gfs2_holder gh;
 529        int error;
 530
 531        unlock_page(page);
 532        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 533        error = gfs2_glock_nq(&gh);
 534        if (unlikely(error))
 535                goto out;
 536        error = AOP_TRUNCATED_PAGE;
 537        lock_page(page);
 538        if (page->mapping == mapping && !PageUptodate(page))
 539                error = __gfs2_readpage(file, page);
 540        else
 541                unlock_page(page);
 542        gfs2_glock_dq(&gh);
 543out:
 544        gfs2_holder_uninit(&gh);
 545        if (error && error != AOP_TRUNCATED_PAGE)
 546                lock_page(page);
 547        return error;
 548}
 549
 550/**
 551 * gfs2_internal_read - read an internal file
 552 * @ip: The gfs2 inode
 553 * @buf: The buffer to fill
 554 * @pos: The file position
 555 * @size: The amount to read
 556 *
 557 */
 558
 559int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
 560                       unsigned size)
 561{
 562        struct address_space *mapping = ip->i_inode.i_mapping;
 563        unsigned long index = *pos / PAGE_SIZE;
 564        unsigned offset = *pos & (PAGE_SIZE - 1);
 565        unsigned copied = 0;
 566        unsigned amt;
 567        struct page *page;
 568        void *p;
 569
 570        do {
 571                amt = size - copied;
 572                if (offset + size > PAGE_SIZE)
 573                        amt = PAGE_SIZE - offset;
 574                page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
 575                if (IS_ERR(page))
 576                        return PTR_ERR(page);
 577                p = kmap_atomic(page);
 578                memcpy(buf + copied, p + offset, amt);
 579                kunmap_atomic(p);
 580                put_page(page);
 581                copied += amt;
 582                index++;
 583                offset = 0;
 584        } while(copied < size);
 585        (*pos) += size;
 586        return size;
 587}
 588
 589/**
 590 * gfs2_readpages - Read a bunch of pages at once
 591 * @file: The file to read from
 592 * @mapping: Address space info
 593 * @pages: List of pages to read
 594 * @nr_pages: Number of pages to read
 595 *
 596 * Some notes:
 597 * 1. This is only for readahead, so we can simply ignore any things
 598 *    which are slightly inconvenient (such as locking conflicts between
 599 *    the page lock and the glock) and return having done no I/O. Its
 600 *    obviously not something we'd want to do on too regular a basis.
 601 *    Any I/O we ignore at this time will be done via readpage later.
 602 * 2. We don't handle stuffed files here we let readpage do the honours.
 603 * 3. mpage_readpages() does most of the heavy lifting in the common case.
 604 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
 605 */
 606
 607static int gfs2_readpages(struct file *file, struct address_space *mapping,
 608                          struct list_head *pages, unsigned nr_pages)
 609{
 610        struct inode *inode = mapping->host;
 611        struct gfs2_inode *ip = GFS2_I(inode);
 612        struct gfs2_sbd *sdp = GFS2_SB(inode);
 613        struct gfs2_holder gh;
 614        int ret;
 615
 616        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 617        ret = gfs2_glock_nq(&gh);
 618        if (unlikely(ret))
 619                goto out_uninit;
 620        if (!gfs2_is_stuffed(ip))
 621                ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
 622        gfs2_glock_dq(&gh);
 623out_uninit:
 624        gfs2_holder_uninit(&gh);
 625        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 626                ret = -EIO;
 627        return ret;
 628}
 629
 630/**
 631 * gfs2_write_begin - Begin to write to a file
 632 * @file: The file to write to
 633 * @mapping: The mapping in which to write
 634 * @pos: The file offset at which to start writing
 635 * @len: Length of the write
 636 * @flags: Various flags
 637 * @pagep: Pointer to return the page
 638 * @fsdata: Pointer to return fs data (unused by GFS2)
 639 *
 640 * Returns: errno
 641 */
 642
 643static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 644                            loff_t pos, unsigned len, unsigned flags,
 645                            struct page **pagep, void **fsdata)
 646{
 647        struct gfs2_inode *ip = GFS2_I(mapping->host);
 648        struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 649        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 650        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 651        unsigned requested = 0;
 652        int alloc_required;
 653        int error = 0;
 654        pgoff_t index = pos >> PAGE_SHIFT;
 655        unsigned from = pos & (PAGE_SIZE - 1);
 656        struct page *page;
 657
 658        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
 659        error = gfs2_glock_nq(&ip->i_gh);
 660        if (unlikely(error))
 661                goto out_uninit;
 662        if (&ip->i_inode == sdp->sd_rindex) {
 663                error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
 664                                           GL_NOCACHE, &m_ip->i_gh);
 665                if (unlikely(error)) {
 666                        gfs2_glock_dq(&ip->i_gh);
 667                        goto out_uninit;
 668                }
 669        }
 670
 671        alloc_required = gfs2_write_alloc_required(ip, pos, len);
 672
 673        if (alloc_required || gfs2_is_jdata(ip))
 674                gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 675
 676        if (alloc_required) {
 677                struct gfs2_alloc_parms ap = { .aflags = 0, };
 678                requested = data_blocks + ind_blocks;
 679                ap.target = requested;
 680                error = gfs2_quota_lock_check(ip, &ap);
 681                if (error)
 682                        goto out_unlock;
 683
 684                error = gfs2_inplace_reserve(ip, &ap);
 685                if (error)
 686                        goto out_qunlock;
 687        }
 688
 689        rblocks = RES_DINODE + ind_blocks;
 690        if (gfs2_is_jdata(ip))
 691                rblocks += data_blocks ? data_blocks : 1;
 692        if (ind_blocks || data_blocks)
 693                rblocks += RES_STATFS + RES_QUOTA;
 694        if (&ip->i_inode == sdp->sd_rindex)
 695                rblocks += 2 * RES_STATFS;
 696        if (alloc_required)
 697                rblocks += gfs2_rg_blocks(ip, requested);
 698
 699        error = gfs2_trans_begin(sdp, rblocks,
 700                                 PAGE_SIZE/sdp->sd_sb.sb_bsize);
 701        if (error)
 702                goto out_trans_fail;
 703
 704        error = -ENOMEM;
 705        flags |= AOP_FLAG_NOFS;
 706        page = grab_cache_page_write_begin(mapping, index, flags);
 707        *pagep = page;
 708        if (unlikely(!page))
 709                goto out_endtrans;
 710
 711        if (gfs2_is_stuffed(ip)) {
 712                error = 0;
 713                if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
 714                        error = gfs2_unstuff_dinode(ip, page);
 715                        if (error == 0)
 716                                goto prepare_write;
 717                } else if (!PageUptodate(page)) {
 718                        error = stuffed_readpage(ip, page);
 719                }
 720                goto out;
 721        }
 722
 723prepare_write:
 724        error = __block_write_begin(page, from, len, gfs2_block_map);
 725out:
 726        if (error == 0)
 727                return 0;
 728
 729        unlock_page(page);
 730        put_page(page);
 731
 732        gfs2_trans_end(sdp);
 733        if (pos + len > ip->i_inode.i_size)
 734                gfs2_trim_blocks(&ip->i_inode);
 735        goto out_trans_fail;
 736
 737out_endtrans:
 738        gfs2_trans_end(sdp);
 739out_trans_fail:
 740        if (alloc_required) {
 741                gfs2_inplace_release(ip);
 742out_qunlock:
 743                gfs2_quota_unlock(ip);
 744        }
 745out_unlock:
 746        if (&ip->i_inode == sdp->sd_rindex) {
 747                gfs2_glock_dq(&m_ip->i_gh);
 748                gfs2_holder_uninit(&m_ip->i_gh);
 749        }
 750        gfs2_glock_dq(&ip->i_gh);
 751out_uninit:
 752        gfs2_holder_uninit(&ip->i_gh);
 753        return error;
 754}
 755
 756/**
 757 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
 758 * @inode: the rindex inode
 759 */
 760static void adjust_fs_space(struct inode *inode)
 761{
 762        struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 763        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 764        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 765        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 766        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 767        struct buffer_head *m_bh, *l_bh;
 768        u64 fs_total, new_free;
 769
 770        /* Total up the file system space, according to the latest rindex. */
 771        fs_total = gfs2_ri_total(sdp);
 772        if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
 773                return;
 774
 775        spin_lock(&sdp->sd_statfs_spin);
 776        gfs2_statfs_change_in(m_sc, m_bh->b_data +
 777                              sizeof(struct gfs2_dinode));
 778        if (fs_total > (m_sc->sc_total + l_sc->sc_total))
 779                new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
 780        else
 781                new_free = 0;
 782        spin_unlock(&sdp->sd_statfs_spin);
 783        fs_warn(sdp, "File system extended by %llu blocks.\n",
 784                (unsigned long long)new_free);
 785        gfs2_statfs_change(sdp, new_free, new_free, 0);
 786
 787        if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
 788                goto out;
 789        update_statfs(sdp, m_bh, l_bh);
 790        brelse(l_bh);
 791out:
 792        brelse(m_bh);
 793}
 794
 795/**
 796 * gfs2_stuffed_write_end - Write end for stuffed files
 797 * @inode: The inode
 798 * @dibh: The buffer_head containing the on-disk inode
 799 * @pos: The file position
 800 * @len: The length of the write
 801 * @copied: How much was actually copied by the VFS
 802 * @page: The page
 803 *
 804 * This copies the data from the page into the inode block after
 805 * the inode data structure itself.
 806 *
 807 * Returns: errno
 808 */
 809static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 810                                  loff_t pos, unsigned len, unsigned copied,
 811                                  struct page *page)
 812{
 813        struct gfs2_inode *ip = GFS2_I(inode);
 814        struct gfs2_sbd *sdp = GFS2_SB(inode);
 815        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 816        u64 to = pos + copied;
 817        void *kaddr;
 818        unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 819
 820        BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
 821        kaddr = kmap_atomic(page);
 822        memcpy(buf + pos, kaddr + pos, copied);
 823        memset(kaddr + pos + copied, 0, len - copied);
 824        flush_dcache_page(page);
 825        kunmap_atomic(kaddr);
 826
 827        if (!PageUptodate(page))
 828                SetPageUptodate(page);
 829        unlock_page(page);
 830        put_page(page);
 831
 832        if (copied) {
 833                if (inode->i_size < to)
 834                        i_size_write(inode, to);
 835                mark_inode_dirty(inode);
 836        }
 837
 838        if (inode == sdp->sd_rindex) {
 839                adjust_fs_space(inode);
 840                sdp->sd_rindex_uptodate = 0;
 841        }
 842
 843        brelse(dibh);
 844        gfs2_trans_end(sdp);
 845        if (inode == sdp->sd_rindex) {
 846                gfs2_glock_dq(&m_ip->i_gh);
 847                gfs2_holder_uninit(&m_ip->i_gh);
 848        }
 849        gfs2_glock_dq(&ip->i_gh);
 850        gfs2_holder_uninit(&ip->i_gh);
 851        return copied;
 852}
 853
 854/**
 855 * gfs2_write_end
 856 * @file: The file to write to
 857 * @mapping: The address space to write to
 858 * @pos: The file position
 859 * @len: The length of the data
 860 * @copied: How much was actually copied by the VFS
 861 * @page: The page that has been written
 862 * @fsdata: The fsdata (unused in GFS2)
 863 *
 864 * The main write_end function for GFS2. We have a separate one for
 865 * stuffed files as they are slightly different, otherwise we just
 866 * put our locking around the VFS provided functions.
 867 *
 868 * Returns: errno
 869 */
 870
 871static int gfs2_write_end(struct file *file, struct address_space *mapping,
 872                          loff_t pos, unsigned len, unsigned copied,
 873                          struct page *page, void *fsdata)
 874{
 875        struct inode *inode = page->mapping->host;
 876        struct gfs2_inode *ip = GFS2_I(inode);
 877        struct gfs2_sbd *sdp = GFS2_SB(inode);
 878        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 879        struct buffer_head *dibh;
 880        unsigned int from = pos & (PAGE_SIZE - 1);
 881        unsigned int to = from + len;
 882        int ret;
 883        struct gfs2_trans *tr = current->journal_info;
 884        BUG_ON(!tr);
 885
 886        BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 887
 888        ret = gfs2_meta_inode_buffer(ip, &dibh);
 889        if (unlikely(ret)) {
 890                unlock_page(page);
 891                put_page(page);
 892                goto failed;
 893        }
 894
 895        if (gfs2_is_stuffed(ip))
 896                return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
 897
 898        if (!gfs2_is_writeback(ip))
 899                gfs2_page_add_databufs(ip, page, from, to);
 900
 901        ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 902        if (tr->tr_num_buf_new)
 903                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 904        else
 905                gfs2_trans_add_meta(ip->i_gl, dibh);
 906
 907
 908        if (inode == sdp->sd_rindex) {
 909                adjust_fs_space(inode);
 910                sdp->sd_rindex_uptodate = 0;
 911        }
 912
 913        brelse(dibh);
 914failed:
 915        gfs2_trans_end(sdp);
 916        gfs2_inplace_release(ip);
 917        if (ip->i_qadata && ip->i_qadata->qa_qd_num)
 918                gfs2_quota_unlock(ip);
 919        if (inode == sdp->sd_rindex) {
 920                gfs2_glock_dq(&m_ip->i_gh);
 921                gfs2_holder_uninit(&m_ip->i_gh);
 922        }
 923        gfs2_glock_dq(&ip->i_gh);
 924        gfs2_holder_uninit(&ip->i_gh);
 925        return ret;
 926}
 927
 928/**
 929 * gfs2_set_page_dirty - Page dirtying function
 930 * @page: The page to dirty
 931 *
 932 * Returns: 1 if it dirtyed the page, or 0 otherwise
 933 */
 934 
 935static int gfs2_set_page_dirty(struct page *page)
 936{
 937        SetPageChecked(page);
 938        return __set_page_dirty_buffers(page);
 939}
 940
 941/**
 942 * gfs2_bmap - Block map function
 943 * @mapping: Address space info
 944 * @lblock: The block to map
 945 *
 946 * Returns: The disk address for the block or 0 on hole or error
 947 */
 948
 949static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
 950{
 951        struct gfs2_inode *ip = GFS2_I(mapping->host);
 952        struct gfs2_holder i_gh;
 953        sector_t dblock = 0;
 954        int error;
 955
 956        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 957        if (error)
 958                return 0;
 959
 960        if (!gfs2_is_stuffed(ip))
 961                dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
 962
 963        gfs2_glock_dq_uninit(&i_gh);
 964
 965        return dblock;
 966}
 967
 968static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
 969{
 970        struct gfs2_bufdata *bd;
 971
 972        lock_buffer(bh);
 973        gfs2_log_lock(sdp);
 974        clear_buffer_dirty(bh);
 975        bd = bh->b_private;
 976        if (bd) {
 977                if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
 978                        list_del_init(&bd->bd_list);
 979                else
 980                        gfs2_remove_from_journal(bh, current->journal_info, 0);
 981        }
 982        bh->b_bdev = NULL;
 983        clear_buffer_mapped(bh);
 984        clear_buffer_req(bh);
 985        clear_buffer_new(bh);
 986        gfs2_log_unlock(sdp);
 987        unlock_buffer(bh);
 988}
 989
 990static void gfs2_invalidatepage(struct page *page, unsigned int offset,
 991                                unsigned int length)
 992{
 993        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 994        unsigned int stop = offset + length;
 995        int partial_page = (offset || length < PAGE_SIZE);
 996        struct buffer_head *bh, *head;
 997        unsigned long pos = 0;
 998
 999        BUG_ON(!PageLocked(page));
1000        if (!partial_page)
1001                ClearPageChecked(page);
1002        if (!page_has_buffers(page))
1003                goto out;
1004
1005        bh = head = page_buffers(page);
1006        do {
1007                if (pos + bh->b_size > stop)
1008                        return;
1009
1010                if (offset <= pos)
1011                        gfs2_discard(sdp, bh);
1012                pos += bh->b_size;
1013                bh = bh->b_this_page;
1014        } while (bh != head);
1015out:
1016        if (!partial_page)
1017                try_to_release_page(page, 0);
1018}
1019
1020/**
1021 * gfs2_ok_for_dio - check that dio is valid on this file
1022 * @ip: The inode
1023 * @offset: The offset at which we are reading or writing
1024 *
1025 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
1026 *          1 (to accept the i/o request)
1027 */
1028static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
1029{
1030        /*
1031         * Should we return an error here? I can't see that O_DIRECT for
1032         * a stuffed file makes any sense. For now we'll silently fall
1033         * back to buffered I/O
1034         */
1035        if (gfs2_is_stuffed(ip))
1036                return 0;
1037
1038        if (offset >= i_size_read(&ip->i_inode))
1039                return 0;
1040        return 1;
1041}
1042
1043
1044
1045static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1046                              loff_t offset)
1047{
1048        struct file *file = iocb->ki_filp;
1049        struct inode *inode = file->f_mapping->host;
1050        struct address_space *mapping = inode->i_mapping;
1051        struct gfs2_inode *ip = GFS2_I(inode);
1052        struct gfs2_holder gh;
1053        int rv;
1054
1055        /*
1056         * Deferred lock, even if its a write, since we do no allocation
1057         * on this path. All we need change is atime, and this lock mode
1058         * ensures that other nodes have flushed their buffered read caches
1059         * (i.e. their page cache entries for this inode). We do not,
1060         * unfortunately have the option of only flushing a range like
1061         * the VFS does.
1062         */
1063        gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
1064        rv = gfs2_glock_nq(&gh);
1065        if (rv)
1066                return rv;
1067        rv = gfs2_ok_for_dio(ip, offset);
1068        if (rv != 1)
1069                goto out; /* dio not valid, fall back to buffered i/o */
1070
1071        /*
1072         * Now since we are holding a deferred (CW) lock at this point, you
1073         * might be wondering why this is ever needed. There is a case however
1074         * where we've granted a deferred local lock against a cached exclusive
1075         * glock. That is ok provided all granted local locks are deferred, but
1076         * it also means that it is possible to encounter pages which are
1077         * cached and possibly also mapped. So here we check for that and sort
1078         * them out ahead of the dio. The glock state machine will take care of
1079         * everything else.
1080         *
1081         * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
1082         * the first place, mapping->nr_pages will always be zero.
1083         */
1084        if (mapping->nrpages) {
1085                loff_t lstart = offset & ~(PAGE_SIZE - 1);
1086                loff_t len = iov_iter_count(iter);
1087                loff_t end = PAGE_ALIGN(offset + len) - 1;
1088
1089                rv = 0;
1090                if (len == 0)
1091                        goto out;
1092                if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
1093                        unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
1094                rv = filemap_write_and_wait_range(mapping, lstart, end);
1095                if (rv)
1096                        goto out;
1097                if (iov_iter_rw(iter) == WRITE)
1098                        truncate_inode_pages_range(mapping, lstart, end);
1099        }
1100
1101        rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
1102                                  offset, gfs2_get_block_direct, NULL, NULL, 0);
1103out:
1104        gfs2_glock_dq(&gh);
1105        gfs2_holder_uninit(&gh);
1106        return rv;
1107}
1108
1109/**
1110 * gfs2_releasepage - free the metadata associated with a page
1111 * @page: the page that's being released
1112 * @gfp_mask: passed from Linux VFS, ignored by us
1113 *
1114 * Call try_to_free_buffers() if the buffers in this page can be
1115 * released.
1116 *
1117 * Returns: 0
1118 */
1119
1120int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1121{
1122        struct address_space *mapping = page->mapping;
1123        struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
1124        struct buffer_head *bh, *head;
1125        struct gfs2_bufdata *bd;
1126
1127        if (!page_has_buffers(page))
1128                return 0;
1129
1130        gfs2_log_lock(sdp);
1131        spin_lock(&sdp->sd_ail_lock);
1132        head = bh = page_buffers(page);
1133        do {
1134                if (atomic_read(&bh->b_count))
1135                        goto cannot_release;
1136                bd = bh->b_private;
1137                if (bd && bd->bd_tr)
1138                        goto cannot_release;
1139                if (buffer_pinned(bh) || buffer_dirty(bh))
1140                        goto not_possible;
1141                bh = bh->b_this_page;
1142        } while(bh != head);
1143        spin_unlock(&sdp->sd_ail_lock);
1144
1145        head = bh = page_buffers(page);
1146        do {
1147                bd = bh->b_private;
1148                if (bd) {
1149                        gfs2_assert_warn(sdp, bd->bd_bh == bh);
1150                        if (!list_empty(&bd->bd_list))
1151                                list_del_init(&bd->bd_list);
1152                        bd->bd_bh = NULL;
1153                        bh->b_private = NULL;
1154                        kmem_cache_free(gfs2_bufdata_cachep, bd);
1155                }
1156
1157                bh = bh->b_this_page;
1158        } while (bh != head);
1159        gfs2_log_unlock(sdp);
1160
1161        return try_to_free_buffers(page);
1162
1163not_possible: /* Should never happen */
1164        WARN_ON(buffer_dirty(bh));
1165        WARN_ON(buffer_pinned(bh));
1166cannot_release:
1167        spin_unlock(&sdp->sd_ail_lock);
1168        gfs2_log_unlock(sdp);
1169        return 0;
1170}
1171
1172static const struct address_space_operations gfs2_writeback_aops = {
1173        .writepage = gfs2_writepage,
1174        .writepages = gfs2_writepages,
1175        .readpage = gfs2_readpage,
1176        .readpages = gfs2_readpages,
1177        .write_begin = gfs2_write_begin,
1178        .write_end = gfs2_write_end,
1179        .bmap = gfs2_bmap,
1180        .invalidatepage = gfs2_invalidatepage,
1181        .releasepage = gfs2_releasepage,
1182        .direct_IO = gfs2_direct_IO,
1183        .migratepage = buffer_migrate_page,
1184        .is_partially_uptodate = block_is_partially_uptodate,
1185        .error_remove_page = generic_error_remove_page,
1186};
1187
1188static const struct address_space_operations gfs2_ordered_aops = {
1189        .writepage = gfs2_writepage,
1190        .writepages = gfs2_writepages,
1191        .readpage = gfs2_readpage,
1192        .readpages = gfs2_readpages,
1193        .write_begin = gfs2_write_begin,
1194        .write_end = gfs2_write_end,
1195        .set_page_dirty = gfs2_set_page_dirty,
1196        .bmap = gfs2_bmap,
1197        .invalidatepage = gfs2_invalidatepage,
1198        .releasepage = gfs2_releasepage,
1199        .direct_IO = gfs2_direct_IO,
1200        .migratepage = buffer_migrate_page,
1201        .is_partially_uptodate = block_is_partially_uptodate,
1202        .error_remove_page = generic_error_remove_page,
1203};
1204
1205static const struct address_space_operations gfs2_jdata_aops = {
1206        .writepage = gfs2_jdata_writepage,
1207        .writepages = gfs2_jdata_writepages,
1208        .readpage = gfs2_readpage,
1209        .readpages = gfs2_readpages,
1210        .write_begin = gfs2_write_begin,
1211        .write_end = gfs2_write_end,
1212        .set_page_dirty = gfs2_set_page_dirty,
1213        .bmap = gfs2_bmap,
1214        .invalidatepage = gfs2_invalidatepage,
1215        .releasepage = gfs2_releasepage,
1216        .is_partially_uptodate = block_is_partially_uptodate,
1217        .error_remove_page = generic_error_remove_page,
1218};
1219
1220void gfs2_set_aops(struct inode *inode)
1221{
1222        struct gfs2_inode *ip = GFS2_I(inode);
1223
1224        if (gfs2_is_writeback(ip))
1225                inode->i_mapping->a_ops = &gfs2_writeback_aops;
1226        else if (gfs2_is_ordered(ip))
1227                inode->i_mapping->a_ops = &gfs2_ordered_aops;
1228        else if (gfs2_is_jdata(ip))
1229                inode->i_mapping->a_ops = &gfs2_jdata_aops;
1230        else
1231                BUG();
1232}
1233
1234