LXR linux/fs/gfs2/aops.c

   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/sched.h>
  11#include <linux/slab.h>
  12#include <linux/spinlock.h>
  13#include <linux/completion.h>
  14#include <linux/buffer_head.h>
  15#include <linux/pagemap.h>
  16#include <linux/pagevec.h>
  17#include <linux/mpage.h>
  18#include <linux/fs.h>
  19#include <linux/writeback.h>
  20#include <linux/swap.h>
  21#include <linux/gfs2_ondisk.h>
  22#include <linux/backing-dev.h>
  23#include <linux/aio.h>
  24
  25#include "gfs2.h"
  26#include "incore.h"
  27#include "bmap.h"
  28#include "glock.h"
  29#include "inode.h"
  30#include "log.h"
  31#include "meta_io.h"
  32#include "quota.h"
  33#include "trans.h"
  34#include "rgrp.h"
  35#include "super.h"
  36#include "util.h"
  37#include "glops.h"
  38
  39
  40static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
  41                                   unsigned int from, unsigned int to)
  42{
  43        struct buffer_head *head = page_buffers(page);
  44        unsigned int bsize = head->b_size;
  45        struct buffer_head *bh;
  46        unsigned int start, end;
  47
  48        for (bh = head, start = 0; bh != head || !start;
  49             bh = bh->b_this_page, start = end) {
  50                end = start + bsize;
  51                if (end <= from || start >= to)
  52                        continue;
  53                if (gfs2_is_jdata(ip))
  54                        set_buffer_uptodate(bh);
  55                gfs2_trans_add_data(ip->i_gl, bh);
  56        }
  57}
  58
  59/**
  60 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
  61 * @inode: The inode
  62 * @lblock: The block number to look up
  63 * @bh_result: The buffer head to return the result in
  64 * @create: Non-zero if we may add block to the file
  65 *
  66 * Returns: errno
  67 */
  68
  69static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
  70                                  struct buffer_head *bh_result, int create)
  71{
  72        int error;
  73
  74        error = gfs2_block_map(inode, lblock, bh_result, 0);
  75        if (error)
  76                return error;
  77        if (!buffer_mapped(bh_result))
  78                return -EIO;
  79        return 0;
  80}
  81
  82static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
  83                                 struct buffer_head *bh_result, int create)
  84{
  85        return gfs2_block_map(inode, lblock, bh_result, 0);
  86}
  87
  88/**
  89 * gfs2_writepage_common - Common bits of writepage
  90 * @page: The page to be written
  91 * @wbc: The writeback control
  92 *
  93 * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
  94 */
  95
  96static int gfs2_writepage_common(struct page *page,
  97                                 struct writeback_control *wbc)
  98{
  99        struct inode *inode = page->mapping->host;
 100        struct gfs2_inode *ip = GFS2_I(inode);
 101        struct gfs2_sbd *sdp = GFS2_SB(inode);
 102        loff_t i_size = i_size_read(inode);
 103        pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
 104        unsigned offset;
 105
 106        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
 107                goto out;
 108        if (current->journal_info)
 109                goto redirty;
 110        /* Is the page fully outside i_size? (truncate in progress) */
 111        offset = i_size & (PAGE_CACHE_SIZE-1);
 112        if (page->index > end_index || (page->index == end_index && !offset)) {
 113                page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
 114                goto out;
 115        }
 116        return 1;
 117redirty:
 118        redirty_page_for_writepage(wbc, page);
 119out:
 120        unlock_page(page);
 121        return 0;
 122}
 123
 124/**
 125 * gfs2_writepage - Write page for writeback mappings
 126 * @page: The page
 127 * @wbc: The writeback control
 128 *
 129 */
 130
 131static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 132{
 133        int ret;
 134
 135        ret = gfs2_writepage_common(page, wbc);
 136        if (ret <= 0)
 137                return ret;
 138
 139        return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
 140}
 141
 142/**
 143 * __gfs2_jdata_writepage - The core of jdata writepage
 144 * @page: The page to write
 145 * @wbc: The writeback control
 146 *
 147 * This is shared between writepage and writepages and implements the
 148 * core of the writepage operation. If a transaction is required then
 149 * PageChecked will have been set and the transaction will have
 150 * already been started before this is called.
 151 */
 152
 153static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 154{
 155        struct inode *inode = page->mapping->host;
 156        struct gfs2_inode *ip = GFS2_I(inode);
 157        struct gfs2_sbd *sdp = GFS2_SB(inode);
 158
 159        if (PageChecked(page)) {
 160                ClearPageChecked(page);
 161                if (!page_has_buffers(page)) {
 162                        create_empty_buffers(page, inode->i_sb->s_blocksize,
 163                                             (1 << BH_Dirty)|(1 << BH_Uptodate));
 164                }
 165                gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
 166        }
 167        return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
 168}
 169
 170/**
 171 * gfs2_jdata_writepage - Write complete page
 172 * @page: Page to write
 173 *
 174 * Returns: errno
 175 *
 176 */
 177
 178static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 179{
 180        struct inode *inode = page->mapping->host;
 181        struct gfs2_sbd *sdp = GFS2_SB(inode);
 182        int ret;
 183        int done_trans = 0;
 184
 185        if (PageChecked(page)) {
 186                if (wbc->sync_mode != WB_SYNC_ALL)
 187                        goto out_ignore;
 188                ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
 189                if (ret)
 190                        goto out_ignore;
 191                done_trans = 1;
 192        }
 193        ret = gfs2_writepage_common(page, wbc);
 194        if (ret > 0)
 195                ret = __gfs2_jdata_writepage(page, wbc);
 196        if (done_trans)
 197                gfs2_trans_end(sdp);
 198        return ret;
 199
 200out_ignore:
 201        redirty_page_for_writepage(wbc, page);
 202        unlock_page(page);
 203        return 0;
 204}
 205
 206/**
 207 * gfs2_writepages - Write a bunch of dirty pages back to disk
 208 * @mapping: The mapping to write
 209 * @wbc: Write-back control
 210 *
 211 * Used for both ordered and writeback modes.
 212 */
 213static int gfs2_writepages(struct address_space *mapping,
 214                           struct writeback_control *wbc)
 215{
 216        return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
 217}
 218
 219/**
 220 * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
 221 * @mapping: The mapping
 222 * @wbc: The writeback control
 223 * @writepage: The writepage function to call for each page
 224 * @pvec: The vector of pages
 225 * @nr_pages: The number of pages to write
 226 *
 227 * Returns: non-zero if loop should terminate, zero otherwise
 228 */
 229
 230static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 231                                    struct writeback_control *wbc,
 232                                    struct pagevec *pvec,
 233                                    int nr_pages, pgoff_t end)
 234{
 235        struct inode *inode = mapping->host;
 236        struct gfs2_sbd *sdp = GFS2_SB(inode);
 237        loff_t i_size = i_size_read(inode);
 238        pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
 239        unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
 240        unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
 241        int i;
 242        int ret;
 243
 244        ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
 245        if (ret < 0)
 246                return ret;
 247
 248        for(i = 0; i < nr_pages; i++) {
 249                struct page *page = pvec->pages[i];
 250
 251                lock_page(page);
 252
 253                if (unlikely(page->mapping != mapping)) {
 254                        unlock_page(page);
 255                        continue;
 256                }
 257
 258                if (!wbc->range_cyclic && page->index > end) {
 259                        ret = 1;
 260                        unlock_page(page);
 261                        continue;
 262                }
 263
 264                if (wbc->sync_mode != WB_SYNC_NONE)
 265                        wait_on_page_writeback(page);
 266
 267                if (PageWriteback(page) ||
 268                    !clear_page_dirty_for_io(page)) {
 269                        unlock_page(page);
 270                        continue;
 271                }
 272
 273                /* Is the page fully outside i_size? (truncate in progress) */
 274                if (page->index > end_index || (page->index == end_index && !offset)) {
 275                        page->mapping->a_ops->invalidatepage(page, 0,
 276                                                             PAGE_CACHE_SIZE);
 277                        unlock_page(page);
 278                        continue;
 279                }
 280
 281                ret = __gfs2_jdata_writepage(page, wbc);
 282
 283                if (ret || (--(wbc->nr_to_write) <= 0))
 284                        ret = 1;
 285        }
 286        gfs2_trans_end(sdp);
 287        return ret;
 288}
 289
 290/**
 291 * gfs2_write_cache_jdata - Like write_cache_pages but different
 292 * @mapping: The mapping to write
 293 * @wbc: The writeback control
 294 * @writepage: The writepage function to call
 295 * @data: The data to pass to writepage
 296 *
 297 * The reason that we use our own function here is that we need to
 298 * start transactions before we grab page locks. This allows us
 299 * to get the ordering right.
 300 */
 301
 302static int gfs2_write_cache_jdata(struct address_space *mapping,
 303                                  struct writeback_control *wbc)
 304{
 305        int ret = 0;
 306        int done = 0;
 307        struct pagevec pvec;
 308        int nr_pages;
 309        pgoff_t index;
 310        pgoff_t end;
 311        int scanned = 0;
 312        int range_whole = 0;
 313
 314        pagevec_init(&pvec, 0);
 315        if (wbc->range_cyclic) {
 316                index = mapping->writeback_index; /* Start from prev offset */
 317                end = -1;
 318        } else {
 319                index = wbc->range_start >> PAGE_CACHE_SHIFT;
 320                end = wbc->range_end >> PAGE_CACHE_SHIFT;
 321                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 322                        range_whole = 1;
 323                scanned = 1;
 324        }
 325
 326retry:
 327         while (!done && (index <= end) &&
 328                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 329                                               PAGECACHE_TAG_DIRTY,
 330                                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
 331                scanned = 1;
 332                ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
 333                if (ret)
 334                        done = 1;
 335                if (ret > 0)
 336                        ret = 0;
 337
 338                pagevec_release(&pvec);
 339                cond_resched();
 340        }
 341
 342        if (!scanned && !done) {
 343                /*
 344                 * We hit the last page and there is more work to be done: wrap
 345                 * back to the start of the file
 346                 */
 347                scanned = 1;
 348                index = 0;
 349                goto retry;
 350        }
 351
 352        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 353                mapping->writeback_index = index;
 354        return ret;
 355}
 356
 357
 358/**
 359 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
 360 * @mapping: The mapping to write
 361 * @wbc: The writeback control
 362 * 
 363 */
 364
 365static int gfs2_jdata_writepages(struct address_space *mapping,
 366                                 struct writeback_control *wbc)
 367{
 368        struct gfs2_inode *ip = GFS2_I(mapping->host);
 369        struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 370        int ret;
 371
 372        ret = gfs2_write_cache_jdata(mapping, wbc);
 373        if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
 374                gfs2_log_flush(sdp, ip->i_gl);
 375                ret = gfs2_write_cache_jdata(mapping, wbc);
 376        }
 377        return ret;
 378}
 379
 380/**
 381 * stuffed_readpage - Fill in a Linux page with stuffed file data
 382 * @ip: the inode
 383 * @page: the page
 384 *
 385 * Returns: errno
 386 */
 387
 388static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 389{
 390        struct buffer_head *dibh;
 391        u64 dsize = i_size_read(&ip->i_inode);
 392        void *kaddr;
 393        int error;
 394
 395        /*
 396         * Due to the order of unstuffing files and ->fault(), we can be
 397         * asked for a zero page in the case of a stuffed file being extended,
 398         * so we need to supply one here. It doesn't happen often.
 399         */
 400        if (unlikely(page->index)) {
 401                zero_user(page, 0, PAGE_CACHE_SIZE);
 402                SetPageUptodate(page);
 403                return 0;
 404        }
 405
 406        error = gfs2_meta_inode_buffer(ip, &dibh);
 407        if (error)
 408                return error;
 409
 410        kaddr = kmap_atomic(page);
 411        if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
 412                dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
 413        memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 414        memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
 415        kunmap_atomic(kaddr);
 416        flush_dcache_page(page);
 417        brelse(dibh);
 418        SetPageUptodate(page);
 419
 420        return 0;
 421}
 422
 423
 424/**
 425 * __gfs2_readpage - readpage
 426 * @file: The file to read a page for
 427 * @page: The page to read
 428 *
 429 * This is the core of gfs2's readpage. Its used by the internal file
 430 * reading code as in that case we already hold the glock. Also its
 431 * called by gfs2_readpage() once the required lock has been granted.
 432 *
 433 */
 434
 435static int __gfs2_readpage(void *file, struct page *page)
 436{
 437        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 438        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 439        int error;
 440
 441        if (gfs2_is_stuffed(ip)) {
 442                error = stuffed_readpage(ip, page);
 443                unlock_page(page);
 444        } else {
 445                error = mpage_readpage(page, gfs2_block_map);
 446        }
 447
 448        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 449                return -EIO;
 450
 451        return error;
 452}
 453
 454/**
 455 * gfs2_readpage - read a page of a file
 456 * @file: The file to read
 457 * @page: The page of the file
 458 *
 459 * This deals with the locking required. We have to unlock and
 460 * relock the page in order to get the locking in the right
 461 * order.
 462 */
 463
 464static int gfs2_readpage(struct file *file, struct page *page)
 465{
 466        struct address_space *mapping = page->mapping;
 467        struct gfs2_inode *ip = GFS2_I(mapping->host);
 468        struct gfs2_holder gh;
 469        int error;
 470
 471        unlock_page(page);
 472        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 473        error = gfs2_glock_nq(&gh);
 474        if (unlikely(error))
 475                goto out;
 476        error = AOP_TRUNCATED_PAGE;
 477        lock_page(page);
 478        if (page->mapping == mapping && !PageUptodate(page))
 479                error = __gfs2_readpage(file, page);
 480        else
 481                unlock_page(page);
 482        gfs2_glock_dq(&gh);
 483out:
 484        gfs2_holder_uninit(&gh);
 485        if (error && error != AOP_TRUNCATED_PAGE)
 486                lock_page(page);
 487        return error;
 488}
 489
 490/**
 491 * gfs2_internal_read - read an internal file
 492 * @ip: The gfs2 inode
 493 * @buf: The buffer to fill
 494 * @pos: The file position
 495 * @size: The amount to read
 496 *
 497 */
 498
 499int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
 500                       unsigned size)
 501{
 502        struct address_space *mapping = ip->i_inode.i_mapping;
 503        unsigned long index = *pos / PAGE_CACHE_SIZE;
 504        unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
 505        unsigned copied = 0;
 506        unsigned amt;
 507        struct page *page;
 508        void *p;
 509
 510        do {
 511                amt = size - copied;
 512                if (offset + size > PAGE_CACHE_SIZE)
 513                        amt = PAGE_CACHE_SIZE - offset;
 514                page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
 515                if (IS_ERR(page))
 516                        return PTR_ERR(page);
 517                p = kmap_atomic(page);
 518                memcpy(buf + copied, p + offset, amt);
 519                kunmap_atomic(p);
 520                mark_page_accessed(page);
 521                page_cache_release(page);
 522                copied += amt;
 523                index++;
 524                offset = 0;
 525        } while(copied < size);
 526        (*pos) += size;
 527        return size;
 528}
 529
 530/**
 531 * gfs2_readpages - Read a bunch of pages at once
 532 *
 533 * Some notes:
 534 * 1. This is only for readahead, so we can simply ignore any things
 535 *    which are slightly inconvenient (such as locking conflicts between
 536 *    the page lock and the glock) and return having done no I/O. Its
 537 *    obviously not something we'd want to do on too regular a basis.
 538 *    Any I/O we ignore at this time will be done via readpage later.
 539 * 2. We don't handle stuffed files here we let readpage do the honours.
 540 * 3. mpage_readpages() does most of the heavy lifting in the common case.
 541 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
 542 */
 543
 544static int gfs2_readpages(struct file *file, struct address_space *mapping,
 545                          struct list_head *pages, unsigned nr_pages)
 546{
 547        struct inode *inode = mapping->host;
 548        struct gfs2_inode *ip = GFS2_I(inode);
 549        struct gfs2_sbd *sdp = GFS2_SB(inode);
 550        struct gfs2_holder gh;
 551        int ret;
 552
 553        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 554        ret = gfs2_glock_nq(&gh);
 555        if (unlikely(ret))
 556                goto out_uninit;
 557        if (!gfs2_is_stuffed(ip))
 558                ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
 559        gfs2_glock_dq(&gh);
 560out_uninit:
 561        gfs2_holder_uninit(&gh);
 562        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 563                ret = -EIO;
 564        return ret;
 565}
 566
 567/**
 568 * gfs2_write_begin - Begin to write to a file
 569 * @file: The file to write to
 570 * @mapping: The mapping in which to write
 571 * @pos: The file offset at which to start writing
 572 * @len: Length of the write
 573 * @flags: Various flags
 574 * @pagep: Pointer to return the page
 575 * @fsdata: Pointer to return fs data (unused by GFS2)
 576 *
 577 * Returns: errno
 578 */
 579
 580static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 581                            loff_t pos, unsigned len, unsigned flags,
 582                            struct page **pagep, void **fsdata)
 583{
 584        struct gfs2_inode *ip = GFS2_I(mapping->host);
 585        struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 586        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 587        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
 588        unsigned requested = 0;
 589        int alloc_required;
 590        int error = 0;
 591        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 592        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 593        struct page *page;
 594
 595        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
 596        error = gfs2_glock_nq(&ip->i_gh);
 597        if (unlikely(error))
 598                goto out_uninit;
 599        if (&ip->i_inode == sdp->sd_rindex) {
 600                error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
 601                                           GL_NOCACHE, &m_ip->i_gh);
 602                if (unlikely(error)) {
 603                        gfs2_glock_dq(&ip->i_gh);
 604                        goto out_uninit;
 605                }
 606        }
 607
 608        alloc_required = gfs2_write_alloc_required(ip, pos, len);
 609
 610        if (alloc_required || gfs2_is_jdata(ip))
 611                gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 612
 613        if (alloc_required) {
 614                struct gfs2_alloc_parms ap = { .aflags = 0, };
 615                error = gfs2_quota_lock_check(ip);
 616                if (error)
 617                        goto out_unlock;
 618
 619                requested = data_blocks + ind_blocks;
 620                ap.target = requested;
 621                error = gfs2_inplace_reserve(ip, &ap);
 622                if (error)
 623                        goto out_qunlock;
 624        }
 625
 626        rblocks = RES_DINODE + ind_blocks;
 627        if (gfs2_is_jdata(ip))
 628                rblocks += data_blocks ? data_blocks : 1;
 629        if (ind_blocks || data_blocks)
 630                rblocks += RES_STATFS + RES_QUOTA;
 631        if (&ip->i_inode == sdp->sd_rindex)
 632                rblocks += 2 * RES_STATFS;
 633        if (alloc_required)
 634                rblocks += gfs2_rg_blocks(ip, requested);
 635
 636        error = gfs2_trans_begin(sdp, rblocks,
 637                                 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
 638        if (error)
 639                goto out_trans_fail;
 640
 641        error = -ENOMEM;
 642        flags |= AOP_FLAG_NOFS;
 643        page = grab_cache_page_write_begin(mapping, index, flags);
 644        *pagep = page;
 645        if (unlikely(!page))
 646                goto out_endtrans;
 647
 648        if (gfs2_is_stuffed(ip)) {
 649                error = 0;
 650                if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
 651                        error = gfs2_unstuff_dinode(ip, page);
 652                        if (error == 0)
 653                                goto prepare_write;
 654                } else if (!PageUptodate(page)) {
 655                        error = stuffed_readpage(ip, page);
 656                }
 657                goto out;
 658        }
 659
 660prepare_write:
 661        error = __block_write_begin(page, from, len, gfs2_block_map);
 662out:
 663        if (error == 0)
 664                return 0;
 665
 666        unlock_page(page);
 667        page_cache_release(page);
 668
 669        gfs2_trans_end(sdp);
 670        if (pos + len > ip->i_inode.i_size)
 671                gfs2_trim_blocks(&ip->i_inode);
 672        goto out_trans_fail;
 673
 674out_endtrans:
 675        gfs2_trans_end(sdp);
 676out_trans_fail:
 677        if (alloc_required) {
 678                gfs2_inplace_release(ip);
 679out_qunlock:
 680                gfs2_quota_unlock(ip);
 681        }
 682out_unlock:
 683        if (&ip->i_inode == sdp->sd_rindex) {
 684                gfs2_glock_dq(&m_ip->i_gh);
 685                gfs2_holder_uninit(&m_ip->i_gh);
 686        }
 687        gfs2_glock_dq(&ip->i_gh);
 688out_uninit:
 689        gfs2_holder_uninit(&ip->i_gh);
 690        return error;
 691}
 692
 693/**
 694 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
 695 * @inode: the rindex inode
 696 */
 697static void adjust_fs_space(struct inode *inode)
 698{
 699        struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 700        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 701        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 702        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 703        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 704        struct buffer_head *m_bh, *l_bh;
 705        u64 fs_total, new_free;
 706
 707        /* Total up the file system space, according to the latest rindex. */
 708        fs_total = gfs2_ri_total(sdp);
 709        if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
 710                return;
 711
 712        spin_lock(&sdp->sd_statfs_spin);
 713        gfs2_statfs_change_in(m_sc, m_bh->b_data +
 714                              sizeof(struct gfs2_dinode));
 715        if (fs_total > (m_sc->sc_total + l_sc->sc_total))
 716                new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
 717        else
 718                new_free = 0;
 719        spin_unlock(&sdp->sd_statfs_spin);
 720        fs_warn(sdp, "File system extended by %llu blocks.\n",
 721                (unsigned long long)new_free);
 722        gfs2_statfs_change(sdp, new_free, new_free, 0);
 723
 724        if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
 725                goto out;
 726        update_statfs(sdp, m_bh, l_bh);
 727        brelse(l_bh);
 728out:
 729        brelse(m_bh);
 730}
 731
 732/**
 733 * gfs2_stuffed_write_end - Write end for stuffed files
 734 * @inode: The inode
 735 * @dibh: The buffer_head containing the on-disk inode
 736 * @pos: The file position
 737 * @len: The length of the write
 738 * @copied: How much was actually copied by the VFS
 739 * @page: The page
 740 *
 741 * This copies the data from the page into the inode block after
 742 * the inode data structure itself.
 743 *
 744 * Returns: errno
 745 */
 746static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 747                                  loff_t pos, unsigned len, unsigned copied,
 748                                  struct page *page)
 749{
 750        struct gfs2_inode *ip = GFS2_I(inode);
 751        struct gfs2_sbd *sdp = GFS2_SB(inode);
 752        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 753        u64 to = pos + copied;
 754        void *kaddr;
 755        unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 756
 757        BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
 758        kaddr = kmap_atomic(page);
 759        memcpy(buf + pos, kaddr + pos, copied);
 760        memset(kaddr + pos + copied, 0, len - copied);
 761        flush_dcache_page(page);
 762        kunmap_atomic(kaddr);
 763
 764        if (!PageUptodate(page))
 765                SetPageUptodate(page);
 766        unlock_page(page);
 767        page_cache_release(page);
 768
 769        if (copied) {
 770                if (inode->i_size < to)
 771                        i_size_write(inode, to);
 772                mark_inode_dirty(inode);
 773        }
 774
 775        if (inode == sdp->sd_rindex) {
 776                adjust_fs_space(inode);
 777                sdp->sd_rindex_uptodate = 0;
 778        }
 779
 780        brelse(dibh);
 781        gfs2_trans_end(sdp);
 782        if (inode == sdp->sd_rindex) {
 783                gfs2_glock_dq(&m_ip->i_gh);
 784                gfs2_holder_uninit(&m_ip->i_gh);
 785        }
 786        gfs2_glock_dq(&ip->i_gh);
 787        gfs2_holder_uninit(&ip->i_gh);
 788        return copied;
 789}
 790
 791/**
 792 * gfs2_write_end
 793 * @file: The file to write to
 794 * @mapping: The address space to write to
 795 * @pos: The file position
 796 * @len: The length of the data
 797 * @copied:
 798 * @page: The page that has been written
 799 * @fsdata: The fsdata (unused in GFS2)
 800 *
 801 * The main write_end function for GFS2. We have a separate one for
 802 * stuffed files as they are slightly different, otherwise we just
 803 * put our locking around the VFS provided functions.
 804 *
 805 * Returns: errno
 806 */
 807
 808static int gfs2_write_end(struct file *file, struct address_space *mapping,
 809                          loff_t pos, unsigned len, unsigned copied,
 810                          struct page *page, void *fsdata)
 811{
 812        struct inode *inode = page->mapping->host;
 813        struct gfs2_inode *ip = GFS2_I(inode);
 814        struct gfs2_sbd *sdp = GFS2_SB(inode);
 815        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 816        struct buffer_head *dibh;
 817        unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 818        unsigned int to = from + len;
 819        int ret;
 820        struct gfs2_trans *tr = current->journal_info;
 821        BUG_ON(!tr);
 822
 823        BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 824
 825        ret = gfs2_meta_inode_buffer(ip, &dibh);
 826        if (unlikely(ret)) {
 827                unlock_page(page);
 828                page_cache_release(page);
 829                goto failed;
 830        }
 831
 832        if (gfs2_is_stuffed(ip))
 833                return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
 834
 835        if (!gfs2_is_writeback(ip))
 836                gfs2_page_add_databufs(ip, page, from, to);
 837
 838        ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 839        if (tr->tr_num_buf_new)
 840                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 841        else
 842                gfs2_trans_add_meta(ip->i_gl, dibh);
 843
 844
 845        if (inode == sdp->sd_rindex) {
 846                adjust_fs_space(inode);
 847                sdp->sd_rindex_uptodate = 0;
 848        }
 849
 850        brelse(dibh);
 851failed:
 852        gfs2_trans_end(sdp);
 853        gfs2_inplace_release(ip);
 854        if (ip->i_res->rs_qa_qd_num)
 855                gfs2_quota_unlock(ip);
 856        if (inode == sdp->sd_rindex) {
 857                gfs2_glock_dq(&m_ip->i_gh);
 858                gfs2_holder_uninit(&m_ip->i_gh);
 859        }
 860        gfs2_glock_dq(&ip->i_gh);
 861        gfs2_holder_uninit(&ip->i_gh);
 862        return ret;
 863}
 864
 865/**
 866 * gfs2_set_page_dirty - Page dirtying function
 867 * @page: The page to dirty
 868 *
 869 * Returns: 1 if it dirtyed the page, or 0 otherwise
 870 */
 871 
 872static int gfs2_set_page_dirty(struct page *page)
 873{
 874        SetPageChecked(page);
 875        return __set_page_dirty_buffers(page);
 876}
 877
 878/**
 879 * gfs2_bmap - Block map function
 880 * @mapping: Address space info
 881 * @lblock: The block to map
 882 *
 883 * Returns: The disk address for the block or 0 on hole or error
 884 */
 885
 886static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
 887{
 888        struct gfs2_inode *ip = GFS2_I(mapping->host);
 889        struct gfs2_holder i_gh;
 890        sector_t dblock = 0;
 891        int error;
 892
 893        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 894        if (error)
 895                return 0;
 896
 897        if (!gfs2_is_stuffed(ip))
 898                dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
 899
 900        gfs2_glock_dq_uninit(&i_gh);
 901
 902        return dblock;
 903}
 904
 905static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
 906{
 907        struct gfs2_bufdata *bd;
 908
 909        lock_buffer(bh);
 910        gfs2_log_lock(sdp);
 911        clear_buffer_dirty(bh);
 912        bd = bh->b_private;
 913        if (bd) {
 914                if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
 915                        list_del_init(&bd->bd_list);
 916                else
 917                        gfs2_remove_from_journal(bh, current->journal_info, 0);
 918        }
 919        bh->b_bdev = NULL;
 920        clear_buffer_mapped(bh);
 921        clear_buffer_req(bh);
 922        clear_buffer_new(bh);
 923        gfs2_log_unlock(sdp);
 924        unlock_buffer(bh);
 925}
 926
 927static void gfs2_invalidatepage(struct page *page, unsigned int offset,
 928                                unsigned int length)
 929{
 930        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 931        unsigned int stop = offset + length;
 932        int partial_page = (offset || length < PAGE_CACHE_SIZE);
 933        struct buffer_head *bh, *head;
 934        unsigned long pos = 0;
 935
 936        BUG_ON(!PageLocked(page));
 937        if (!partial_page)
 938                ClearPageChecked(page);
 939        if (!page_has_buffers(page))
 940                goto out;
 941
 942        bh = head = page_buffers(page);
 943        do {
 944                if (pos + bh->b_size > stop)
 945                        return;
 946
 947                if (offset <= pos)
 948                        gfs2_discard(sdp, bh);
 949                pos += bh->b_size;
 950                bh = bh->b_this_page;
 951        } while (bh != head);
 952out:
 953        if (!partial_page)
 954                try_to_release_page(page, 0);
 955}
 956
 957/**
 958 * gfs2_ok_for_dio - check that dio is valid on this file
 959 * @ip: The inode
 960 * @rw: READ or WRITE
 961 * @offset: The offset at which we are reading or writing
 962 *
 963 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
 964 *          1 (to accept the i/o request)
 965 */
 966static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
 967{
 968        /*
 969         * Should we return an error here? I can't see that O_DIRECT for
 970         * a stuffed file makes any sense. For now we'll silently fall
 971         * back to buffered I/O
 972         */
 973        if (gfs2_is_stuffed(ip))
 974                return 0;
 975
 976        if (offset >= i_size_read(&ip->i_inode))
 977                return 0;
 978        return 1;
 979}
 980
 981
 982
 983static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 984                              const struct iovec *iov, loff_t offset,
 985                              unsigned long nr_segs)
 986{
 987        struct file *file = iocb->ki_filp;
 988        struct inode *inode = file->f_mapping->host;
 989        struct address_space *mapping = inode->i_mapping;
 990        struct gfs2_inode *ip = GFS2_I(inode);
 991        struct gfs2_holder gh;
 992        int rv;
 993
 994        /*
 995         * Deferred lock, even if its a write, since we do no allocation
 996         * on this path. All we need change is atime, and this lock mode
 997         * ensures that other nodes have flushed their buffered read caches
 998         * (i.e. their page cache entries for this inode). We do not,
 999         * unfortunately have the option of only flushing a range like
1000         * the VFS does.

1001         */
1002        gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
1003        rv = gfs2_glock_nq(&gh);
1004        if (rv)
1005                return rv;
1006        rv = gfs2_ok_for_dio(ip, rw, offset);
1007        if (rv != 1)
1008                goto out; /* dio not valid, fall back to buffered i/o */
1009
1010        /*
1011         * Now since we are holding a deferred (CW) lock at this point, you
1012         * might be wondering why this is ever needed. There is a case however
1013         * where we've granted a deferred local lock against a cached exclusive
1014         * glock. That is ok provided all granted local locks are deferred, but
1015         * it also means that it is possible to encounter pages which are
1016         * cached and possibly also mapped. So here we check for that and sort
1017         * them out ahead of the dio. The glock state machine will take care of
1018         * everything else.
1019         *
1020         * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
1021         * the first place, mapping->nr_pages will always be zero.
1022         */
1023        if (mapping->nrpages) {
1024                loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
1025                loff_t len = iov_length(iov, nr_segs);
1026                loff_t end = PAGE_ALIGN(offset + len) - 1;
1027
1028                rv = 0;
1029                if (len == 0)
1030                        goto out;
1031                if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
1032                        unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
1033                rv = filemap_write_and_wait_range(mapping, lstart, end);
1034                if (rv)
1035                        return rv;
1036                truncate_inode_pages_range(mapping, lstart, end);
1037        }
1038
1039        rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1040                                  offset, nr_segs, gfs2_get_block_direct,
1041                                  NULL, NULL, 0);
1042out:
1043        gfs2_glock_dq(&gh);
1044        gfs2_holder_uninit(&gh);
1045        return rv;
1046}
1047
1048/**
1049 * gfs2_releasepage - free the metadata associated with a page
1050 * @page: the page that's being released
1051 * @gfp_mask: passed from Linux VFS, ignored by us
1052 *
1053 * Call try_to_free_buffers() if the buffers in this page can be
1054 * released.
1055 *
1056 * Returns: 0
1057 */
1058
1059int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1060{
1061        struct address_space *mapping = page->mapping;
1062        struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
1063        struct buffer_head *bh, *head;
1064        struct gfs2_bufdata *bd;
1065
1066        if (!page_has_buffers(page))
1067                return 0;
1068
1069        gfs2_log_lock(sdp);
1070        spin_lock(&sdp->sd_ail_lock);
1071        head = bh = page_buffers(page);
1072        do {
1073                if (atomic_read(&bh->b_count))
1074                        goto cannot_release;
1075                bd = bh->b_private;
1076                if (bd && bd->bd_tr)
1077                        goto cannot_release;
1078                if (buffer_pinned(bh) || buffer_dirty(bh))
1079                        goto not_possible;
1080                bh = bh->b_this_page;
1081        } while(bh != head);
1082        spin_unlock(&sdp->sd_ail_lock);
1083        gfs2_log_unlock(sdp);
1084
1085        head = bh = page_buffers(page);
1086        do {
1087                gfs2_log_lock(sdp);
1088                bd = bh->b_private;
1089                if (bd) {
1090                        gfs2_assert_warn(sdp, bd->bd_bh == bh);
1091                        if (!list_empty(&bd->bd_list)) {
1092                                if (!buffer_pinned(bh))
1093                                        list_del_init(&bd->bd_list);
1094                                else
1095                                        bd = NULL;
1096                        }
1097                        if (bd)
1098                                bd->bd_bh = NULL;
1099                        bh->b_private = NULL;
1100                }
1101                gfs2_log_unlock(sdp);
1102                if (bd)
1103                        kmem_cache_free(gfs2_bufdata_cachep, bd);
1104
1105                bh = bh->b_this_page;
1106        } while (bh != head);
1107
1108        return try_to_free_buffers(page);
1109
1110not_possible: /* Should never happen */
1111        WARN_ON(buffer_dirty(bh));
1112        WARN_ON(buffer_pinned(bh));
1113cannot_release:
1114        spin_unlock(&sdp->sd_ail_lock);
1115        gfs2_log_unlock(sdp);
1116        return 0;
1117}
1118
1119static const struct address_space_operations gfs2_writeback_aops = {
1120        .writepage = gfs2_writepage,
1121        .writepages = gfs2_writepages,
1122        .readpage = gfs2_readpage,
1123        .readpages = gfs2_readpages,
1124        .write_begin = gfs2_write_begin,
1125        .write_end = gfs2_write_end,
1126        .bmap = gfs2_bmap,
1127        .invalidatepage = gfs2_invalidatepage,
1128        .releasepage = gfs2_releasepage,
1129        .direct_IO = gfs2_direct_IO,
1130        .migratepage = buffer_migrate_page,
1131        .is_partially_uptodate = block_is_partially_uptodate,
1132        .error_remove_page = generic_error_remove_page,
1133};
1134
1135static const struct address_space_operations gfs2_ordered_aops = {
1136        .writepage = gfs2_writepage,
1137        .writepages = gfs2_writepages,
1138        .readpage = gfs2_readpage,
1139        .readpages = gfs2_readpages,
1140        .write_begin = gfs2_write_begin,
1141        .write_end = gfs2_write_end,
1142        .set_page_dirty = gfs2_set_page_dirty,
1143        .bmap = gfs2_bmap,
1144        .invalidatepage = gfs2_invalidatepage,
1145        .releasepage = gfs2_releasepage,
1146        .direct_IO = gfs2_direct_IO,
1147        .migratepage = buffer_migrate_page,
1148        .is_partially_uptodate = block_is_partially_uptodate,
1149        .error_remove_page = generic_error_remove_page,
1150};
1151
1152static const struct address_space_operations gfs2_jdata_aops = {
1153        .writepage = gfs2_jdata_writepage,
1154        .writepages = gfs2_jdata_writepages,
1155        .readpage = gfs2_readpage,
1156        .readpages = gfs2_readpages,
1157        .write_begin = gfs2_write_begin,
1158        .write_end = gfs2_write_end,
1159        .set_page_dirty = gfs2_set_page_dirty,
1160        .bmap = gfs2_bmap,
1161        .invalidatepage = gfs2_invalidatepage,
1162        .releasepage = gfs2_releasepage,
1163        .is_partially_uptodate = block_is_partially_uptodate,
1164        .error_remove_page = generic_error_remove_page,
1165};
1166
1167void gfs2_set_aops(struct inode *inode)
1168{
1169        struct gfs2_inode *ip = GFS2_I(inode);
1170
1171        if (gfs2_is_writeback(ip))
1172                inode->i_mapping->a_ops = &gfs2_writeback_aops;
1173        else if (gfs2_is_ordered(ip))
1174                inode->i_mapping->a_ops = &gfs2_ordered_aops;
1175        else if (gfs2_is_jdata(ip))
1176                inode->i_mapping->a_ops = &gfs2_jdata_aops;
1177        else
1178                BUG();
1179}
1180
1181