linux/fs/gfs2/aops.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   5 */
   6
   7#include <linux/sched.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/completion.h>
  11#include <linux/buffer_head.h>
  12#include <linux/pagemap.h>
  13#include <linux/pagevec.h>
  14#include <linux/mpage.h>
  15#include <linux/fs.h>
  16#include <linux/writeback.h>
  17#include <linux/swap.h>
  18#include <linux/gfs2_ondisk.h>
  19#include <linux/backing-dev.h>
  20#include <linux/uio.h>
  21#include <trace/events/writeback.h>
  22#include <linux/sched/signal.h>
  23
  24#include "gfs2.h"
  25#include "incore.h"
  26#include "bmap.h"
  27#include "glock.h"
  28#include "inode.h"
  29#include "log.h"
  30#include "meta_io.h"
  31#include "quota.h"
  32#include "trans.h"
  33#include "rgrp.h"
  34#include "super.h"
  35#include "util.h"
  36#include "glops.h"
  37#include "aops.h"
  38
  39
  40void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
  41                            unsigned int from, unsigned int len)
  42{
  43        struct buffer_head *head = page_buffers(page);
  44        unsigned int bsize = head->b_size;
  45        struct buffer_head *bh;
  46        unsigned int to = from + len;
  47        unsigned int start, end;
  48
  49        for (bh = head, start = 0; bh != head || !start;
  50             bh = bh->b_this_page, start = end) {
  51                end = start + bsize;
  52                if (end <= from)
  53                        continue;
  54                if (start >= to)
  55                        break;
  56                set_buffer_uptodate(bh);
  57                gfs2_trans_add_data(ip->i_gl, bh);
  58        }
  59}
  60
  61/**
  62 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
  63 * @inode: The inode
  64 * @lblock: The block number to look up
  65 * @bh_result: The buffer head to return the result in
  66 * @create: Non-zero if we may add block to the file
  67 *
  68 * Returns: errno
  69 */
  70
  71static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
  72                                  struct buffer_head *bh_result, int create)
  73{
  74        int error;
  75
  76        error = gfs2_block_map(inode, lblock, bh_result, 0);
  77        if (error)
  78                return error;
  79        if (!buffer_mapped(bh_result))
  80                return -EIO;
  81        return 0;
  82}
  83
  84/**
  85 * gfs2_writepage_common - Common bits of writepage
  86 * @page: The page to be written
  87 * @wbc: The writeback control
  88 *
  89 * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
  90 */
  91
  92static int gfs2_writepage_common(struct page *page,
  93                                 struct writeback_control *wbc)
  94{
  95        struct inode *inode = page->mapping->host;
  96        struct gfs2_inode *ip = GFS2_I(inode);
  97        struct gfs2_sbd *sdp = GFS2_SB(inode);
  98        loff_t i_size = i_size_read(inode);
  99        pgoff_t end_index = i_size >> PAGE_SHIFT;
 100        unsigned offset;
 101
 102        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
 103                goto out;
 104        if (current->journal_info)
 105                goto redirty;
 106        /* Is the page fully outside i_size? (truncate in progress) */
 107        offset = i_size & (PAGE_SIZE-1);
 108        if (page->index > end_index || (page->index == end_index && !offset)) {
 109                page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 110                goto out;
 111        }
 112        return 1;
 113redirty:
 114        redirty_page_for_writepage(wbc, page);
 115out:
 116        unlock_page(page);
 117        return 0;
 118}
 119
 120/**
 121 * gfs2_writepage - Write page for writeback mappings
 122 * @page: The page
 123 * @wbc: The writeback control
 124 *
 125 */
 126
 127static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 128{
 129        int ret;
 130
 131        ret = gfs2_writepage_common(page, wbc);
 132        if (ret <= 0)
 133                return ret;
 134
 135        return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
 136}
 137
 138/* This is the same as calling block_write_full_page, but it also
 139 * writes pages outside of i_size
 140 */
 141static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
 142                                struct writeback_control *wbc)
 143{
 144        struct inode * const inode = page->mapping->host;
 145        loff_t i_size = i_size_read(inode);
 146        const pgoff_t end_index = i_size >> PAGE_SHIFT;
 147        unsigned offset;
 148
 149        /*
 150         * The page straddles i_size.  It must be zeroed out on each and every
 151         * writepage invocation because it may be mmapped.  "A file is mapped
 152         * in multiples of the page size.  For a file that is not a multiple of
 153         * the  page size, the remaining memory is zeroed when mapped, and
 154         * writes to that region are not written out to the file."
 155         */
 156        offset = i_size & (PAGE_SIZE-1);
 157        if (page->index == end_index && offset)
 158                zero_user_segment(page, offset, PAGE_SIZE);
 159
 160        return __block_write_full_page(inode, page, get_block, wbc,
 161                                       end_buffer_async_write);
 162}
 163
 164/**
 165 * __gfs2_jdata_writepage - The core of jdata writepage
 166 * @page: The page to write
 167 * @wbc: The writeback control
 168 *
 169 * This is shared between writepage and writepages and implements the
 170 * core of the writepage operation. If a transaction is required then
 171 * PageChecked will have been set and the transaction will have
 172 * already been started before this is called.
 173 */
 174
 175static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 176{
 177        struct inode *inode = page->mapping->host;
 178        struct gfs2_inode *ip = GFS2_I(inode);
 179        struct gfs2_sbd *sdp = GFS2_SB(inode);
 180
 181        if (PageChecked(page)) {
 182                ClearPageChecked(page);
 183                if (!page_has_buffers(page)) {
 184                        create_empty_buffers(page, inode->i_sb->s_blocksize,
 185                                             BIT(BH_Dirty)|BIT(BH_Uptodate));
 186                }
 187                gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
 188        }
 189        return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
 190}
 191
 192/**
 193 * gfs2_jdata_writepage - Write complete page
 194 * @page: Page to write
 195 * @wbc: The writeback control
 196 *
 197 * Returns: errno
 198 *
 199 */
 200
 201static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
 202{
 203        struct inode *inode = page->mapping->host;
 204        struct gfs2_inode *ip = GFS2_I(inode);
 205        struct gfs2_sbd *sdp = GFS2_SB(inode);
 206        int ret;
 207
 208        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
 209                goto out;
 210        if (PageChecked(page) || current->journal_info)
 211                goto out_ignore;
 212        ret = __gfs2_jdata_writepage(page, wbc);
 213        return ret;
 214
 215out_ignore:
 216        redirty_page_for_writepage(wbc, page);
 217out:
 218        unlock_page(page);
 219        return 0;
 220}
 221
 222/**
 223 * gfs2_writepages - Write a bunch of dirty pages back to disk
 224 * @mapping: The mapping to write
 225 * @wbc: Write-back control
 226 *
 227 * Used for both ordered and writeback modes.
 228 */
 229static int gfs2_writepages(struct address_space *mapping,
 230                           struct writeback_control *wbc)
 231{
 232        struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
 233        int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
 234
 235        /*
 236         * Even if we didn't write any pages here, we might still be holding
 237         * dirty pages in the ail. We forcibly flush the ail because we don't
 238         * want balance_dirty_pages() to loop indefinitely trying to write out
 239         * pages held in the ail that it can't find.
 240         */
 241        if (ret == 0)
 242                set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
 243
 244        return ret;
 245}
 246
 247/**
 248 * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
 249 * @mapping: The mapping
 250 * @wbc: The writeback control
 251 * @pvec: The vector of pages
 252 * @nr_pages: The number of pages to write
 253 * @done_index: Page index
 254 *
 255 * Returns: non-zero if loop should terminate, zero otherwise
 256 */
 257
 258static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 259                                    struct writeback_control *wbc,
 260                                    struct pagevec *pvec,
 261                                    int nr_pages,
 262                                    pgoff_t *done_index)
 263{
 264        struct inode *inode = mapping->host;
 265        struct gfs2_sbd *sdp = GFS2_SB(inode);
 266        unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
 267        int i;
 268        int ret;
 269
 270        ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
 271        if (ret < 0)
 272                return ret;
 273
 274        for(i = 0; i < nr_pages; i++) {
 275                struct page *page = pvec->pages[i];
 276
 277                *done_index = page->index;
 278
 279                lock_page(page);
 280
 281                if (unlikely(page->mapping != mapping)) {
 282continue_unlock:
 283                        unlock_page(page);
 284                        continue;
 285                }
 286
 287                if (!PageDirty(page)) {
 288                        /* someone wrote it for us */
 289                        goto continue_unlock;
 290                }
 291
 292                if (PageWriteback(page)) {
 293                        if (wbc->sync_mode != WB_SYNC_NONE)
 294                                wait_on_page_writeback(page);
 295                        else
 296                                goto continue_unlock;
 297                }
 298
 299                BUG_ON(PageWriteback(page));
 300                if (!clear_page_dirty_for_io(page))
 301                        goto continue_unlock;
 302
 303                trace_wbc_writepage(wbc, inode_to_bdi(inode));
 304
 305                ret = __gfs2_jdata_writepage(page, wbc);
 306                if (unlikely(ret)) {
 307                        if (ret == AOP_WRITEPAGE_ACTIVATE) {
 308                                unlock_page(page);
 309                                ret = 0;
 310                        } else {
 311
 312                                /*
 313                                 * done_index is set past this page,
 314                                 * so media errors will not choke
 315                                 * background writeout for the entire
 316                                 * file. This has consequences for
 317                                 * range_cyclic semantics (ie. it may
 318                                 * not be suitable for data integrity
 319                                 * writeout).
 320                                 */
 321                                *done_index = page->index + 1;
 322                                ret = 1;
 323                                break;
 324                        }
 325                }
 326
 327                /*
 328                 * We stop writing back only if we are not doing
 329                 * integrity sync. In case of integrity sync we have to
 330                 * keep going until we have written all the pages
 331                 * we tagged for writeback prior to entering this loop.
 332                 */
 333                if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) {
 334                        ret = 1;
 335                        break;
 336                }
 337
 338        }
 339        gfs2_trans_end(sdp);
 340        return ret;
 341}
 342
 343/**
 344 * gfs2_write_cache_jdata - Like write_cache_pages but different
 345 * @mapping: The mapping to write
 346 * @wbc: The writeback control
 347 *
 348 * The reason that we use our own function here is that we need to
 349 * start transactions before we grab page locks. This allows us
 350 * to get the ordering right.
 351 */
 352
 353static int gfs2_write_cache_jdata(struct address_space *mapping,
 354                                  struct writeback_control *wbc)
 355{
 356        int ret = 0;
 357        int done = 0;
 358        struct pagevec pvec;
 359        int nr_pages;
 360        pgoff_t uninitialized_var(writeback_index);
 361        pgoff_t index;
 362        pgoff_t end;
 363        pgoff_t done_index;
 364        int cycled;
 365        int range_whole = 0;
 366        xa_mark_t tag;
 367
 368        pagevec_init(&pvec);
 369        if (wbc->range_cyclic) {
 370                writeback_index = mapping->writeback_index; /* prev offset */
 371                index = writeback_index;
 372                if (index == 0)
 373                        cycled = 1;
 374                else
 375                        cycled = 0;
 376                end = -1;
 377        } else {
 378                index = wbc->range_start >> PAGE_SHIFT;
 379                end = wbc->range_end >> PAGE_SHIFT;
 380                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 381                        range_whole = 1;
 382                cycled = 1; /* ignore range_cyclic tests */
 383        }
 384        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 385                tag = PAGECACHE_TAG_TOWRITE;
 386        else
 387                tag = PAGECACHE_TAG_DIRTY;
 388
 389retry:
 390        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 391                tag_pages_for_writeback(mapping, index, end);
 392        done_index = index;
 393        while (!done && (index <= end)) {
 394                nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
 395                                tag);
 396                if (nr_pages == 0)
 397                        break;
 398
 399                ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index);
 400                if (ret)
 401                        done = 1;
 402                if (ret > 0)
 403                        ret = 0;
 404                pagevec_release(&pvec);
 405                cond_resched();
 406        }
 407
 408        if (!cycled && !done) {
 409                /*
 410                 * range_cyclic:
 411                 * We hit the last page and there is more work to be done: wrap
 412                 * back to the start of the file
 413                 */
 414                cycled = 1;
 415                index = 0;
 416                end = writeback_index - 1;
 417                goto retry;
 418        }
 419
 420        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 421                mapping->writeback_index = done_index;
 422
 423        return ret;
 424}
 425
 426
 427/**
 428 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
 429 * @mapping: The mapping to write
 430 * @wbc: The writeback control
 431 * 
 432 */
 433
 434static int gfs2_jdata_writepages(struct address_space *mapping,
 435                                 struct writeback_control *wbc)
 436{
 437        struct gfs2_inode *ip = GFS2_I(mapping->host);
 438        struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 439        int ret;
 440
 441        ret = gfs2_write_cache_jdata(mapping, wbc);
 442        if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
 443                gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
 444                               GFS2_LFC_JDATA_WPAGES);
 445                ret = gfs2_write_cache_jdata(mapping, wbc);
 446        }
 447        return ret;
 448}
 449
 450/**
 451 * stuffed_readpage - Fill in a Linux page with stuffed file data
 452 * @ip: the inode
 453 * @page: the page
 454 *
 455 * Returns: errno
 456 */
 457
 458int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 459{
 460        struct buffer_head *dibh;
 461        u64 dsize = i_size_read(&ip->i_inode);
 462        void *kaddr;
 463        int error;
 464
 465        /*
 466         * Due to the order of unstuffing files and ->fault(), we can be
 467         * asked for a zero page in the case of a stuffed file being extended,
 468         * so we need to supply one here. It doesn't happen often.
 469         */
 470        if (unlikely(page->index)) {
 471                zero_user(page, 0, PAGE_SIZE);
 472                SetPageUptodate(page);
 473                return 0;
 474        }
 475
 476        error = gfs2_meta_inode_buffer(ip, &dibh);
 477        if (error)
 478                return error;
 479
 480        kaddr = kmap_atomic(page);
 481        if (dsize > gfs2_max_stuffed_size(ip))
 482                dsize = gfs2_max_stuffed_size(ip);
 483        memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 484        memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 485        kunmap_atomic(kaddr);
 486        flush_dcache_page(page);
 487        brelse(dibh);
 488        SetPageUptodate(page);
 489
 490        return 0;
 491}
 492
 493
 494/**
 495 * __gfs2_readpage - readpage
 496 * @file: The file to read a page for
 497 * @page: The page to read
 498 *
 499 * This is the core of gfs2's readpage. It's used by the internal file
 500 * reading code as in that case we already hold the glock. Also it's
 501 * called by gfs2_readpage() once the required lock has been granted.
 502 */
 503
 504static int __gfs2_readpage(void *file, struct page *page)
 505{
 506        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 507        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 508
 509        int error;
 510
 511        if (i_blocksize(page->mapping->host) == PAGE_SIZE &&
 512            !page_has_buffers(page)) {
 513                error = iomap_readpage(page, &gfs2_iomap_ops);
 514        } else if (gfs2_is_stuffed(ip)) {
 515                error = stuffed_readpage(ip, page);
 516                unlock_page(page);
 517        } else {
 518                error = mpage_readpage(page, gfs2_block_map);
 519        }
 520
 521        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 522                return -EIO;
 523
 524        return error;
 525}
 526
 527/**
 528 * gfs2_readpage - read a page of a file
 529 * @file: The file to read
 530 * @page: The page of the file
 531 *
 532 * This deals with the locking required. We have to unlock and
 533 * relock the page in order to get the locking in the right
 534 * order.
 535 */
 536
 537static int gfs2_readpage(struct file *file, struct page *page)
 538{
 539        struct address_space *mapping = page->mapping;
 540        struct gfs2_inode *ip = GFS2_I(mapping->host);
 541        struct gfs2_holder gh;
 542        int error;
 543
 544        unlock_page(page);
 545        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 546        error = gfs2_glock_nq(&gh);
 547        if (unlikely(error))
 548                goto out;
 549        error = AOP_TRUNCATED_PAGE;
 550        lock_page(page);
 551        if (page->mapping == mapping && !PageUptodate(page))
 552                error = __gfs2_readpage(file, page);
 553        else
 554                unlock_page(page);
 555        gfs2_glock_dq(&gh);
 556out:
 557        gfs2_holder_uninit(&gh);
 558        if (error && error != AOP_TRUNCATED_PAGE)
 559                lock_page(page);
 560        return error;
 561}
 562
 563/**
 564 * gfs2_internal_read - read an internal file
 565 * @ip: The gfs2 inode
 566 * @buf: The buffer to fill
 567 * @pos: The file position
 568 * @size: The amount to read
 569 *
 570 */
 571
 572int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
 573                       unsigned size)
 574{
 575        struct address_space *mapping = ip->i_inode.i_mapping;
 576        unsigned long index = *pos / PAGE_SIZE;
 577        unsigned offset = *pos & (PAGE_SIZE - 1);
 578        unsigned copied = 0;
 579        unsigned amt;
 580        struct page *page;
 581        void *p;
 582
 583        do {
 584                amt = size - copied;
 585                if (offset + size > PAGE_SIZE)
 586                        amt = PAGE_SIZE - offset;
 587                page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
 588                if (IS_ERR(page))
 589                        return PTR_ERR(page);
 590                p = kmap_atomic(page);
 591                memcpy(buf + copied, p + offset, amt);
 592                kunmap_atomic(p);
 593                put_page(page);
 594                copied += amt;
 595                index++;
 596                offset = 0;
 597        } while(copied < size);
 598        (*pos) += size;
 599        return size;
 600}
 601
 602/**
 603 * gfs2_readpages - Read a bunch of pages at once
 604 * @file: The file to read from
 605 * @mapping: Address space info
 606 * @pages: List of pages to read
 607 * @nr_pages: Number of pages to read
 608 *
 609 * Some notes:
 610 * 1. This is only for readahead, so we can simply ignore any things
 611 *    which are slightly inconvenient (such as locking conflicts between
 612 *    the page lock and the glock) and return having done no I/O. Its
 613 *    obviously not something we'd want to do on too regular a basis.
 614 *    Any I/O we ignore at this time will be done via readpage later.
 615 * 2. We don't handle stuffed files here we let readpage do the honours.
 616 * 3. mpage_readpages() does most of the heavy lifting in the common case.
 617 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
 618 */
 619
 620static int gfs2_readpages(struct file *file, struct address_space *mapping,
 621                          struct list_head *pages, unsigned nr_pages)
 622{
 623        struct inode *inode = mapping->host;
 624        struct gfs2_inode *ip = GFS2_I(inode);
 625        struct gfs2_sbd *sdp = GFS2_SB(inode);
 626        struct gfs2_holder gh;
 627        int ret;
 628
 629        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 630        ret = gfs2_glock_nq(&gh);
 631        if (unlikely(ret))
 632                goto out_uninit;
 633        if (!gfs2_is_stuffed(ip))
 634                ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
 635        gfs2_glock_dq(&gh);
 636out_uninit:
 637        gfs2_holder_uninit(&gh);
 638        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 639                ret = -EIO;
 640        return ret;
 641}
 642
 643/**
 644 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
 645 * @inode: the rindex inode
 646 */
 647void adjust_fs_space(struct inode *inode)
 648{
 649        struct gfs2_sbd *sdp = GFS2_SB(inode);
 650        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 651        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 652        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 653        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 654        struct buffer_head *m_bh, *l_bh;
 655        u64 fs_total, new_free;
 656
 657        if (gfs2_trans_begin(sdp, 2 * RES_STATFS, 0) != 0)
 658                return;
 659
 660        /* Total up the file system space, according to the latest rindex. */
 661        fs_total = gfs2_ri_total(sdp);
 662        if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
 663                goto out;
 664
 665        spin_lock(&sdp->sd_statfs_spin);
 666        gfs2_statfs_change_in(m_sc, m_bh->b_data +
 667                              sizeof(struct gfs2_dinode));
 668        if (fs_total > (m_sc->sc_total + l_sc->sc_total))
 669                new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
 670        else
 671                new_free = 0;
 672        spin_unlock(&sdp->sd_statfs_spin);
 673        fs_warn(sdp, "File system extended by %llu blocks.\n",
 674                (unsigned long long)new_free);
 675        gfs2_statfs_change(sdp, new_free, new_free, 0);
 676
 677        if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
 678                goto out2;
 679        update_statfs(sdp, m_bh, l_bh);
 680        brelse(l_bh);
 681out2:
 682        brelse(m_bh);
 683out:
 684        sdp->sd_rindex_uptodate = 0;
 685        gfs2_trans_end(sdp);
 686}
 687
 688/**
 689 * gfs2_stuffed_write_end - Write end for stuffed files
 690 * @inode: The inode
 691 * @dibh: The buffer_head containing the on-disk inode
 692 * @pos: The file position
 693 * @copied: How much was actually copied by the VFS
 694 * @page: The page
 695 *
 696 * This copies the data from the page into the inode block after
 697 * the inode data structure itself.
 698 *
 699 * Returns: copied bytes or errno
 700 */
 701int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 702                           loff_t pos, unsigned copied,
 703                           struct page *page)
 704{
 705        struct gfs2_inode *ip = GFS2_I(inode);
 706        u64 to = pos + copied;
 707        void *kaddr;
 708        unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 709
 710        BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
 711
 712        kaddr = kmap_atomic(page);
 713        memcpy(buf + pos, kaddr + pos, copied);
 714        flush_dcache_page(page);
 715        kunmap_atomic(kaddr);
 716
 717        WARN_ON(!PageUptodate(page));
 718        unlock_page(page);
 719        put_page(page);
 720
 721        if (copied) {
 722                if (inode->i_size < to)
 723                        i_size_write(inode, to);
 724                mark_inode_dirty(inode);
 725        }
 726        return copied;
 727}
 728
 729/**
 730 * jdata_set_page_dirty - Page dirtying function
 731 * @page: The page to dirty
 732 *
 733 * Returns: 1 if it dirtyed the page, or 0 otherwise
 734 */
 735 
 736static int jdata_set_page_dirty(struct page *page)
 737{
 738        SetPageChecked(page);
 739        return __set_page_dirty_buffers(page);
 740}
 741
 742/**
 743 * gfs2_bmap - Block map function
 744 * @mapping: Address space info
 745 * @lblock: The block to map
 746 *
 747 * Returns: The disk address for the block or 0 on hole or error
 748 */
 749
 750static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
 751{
 752        struct gfs2_inode *ip = GFS2_I(mapping->host);
 753        struct gfs2_holder i_gh;
 754        sector_t dblock = 0;
 755        int error;
 756
 757        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 758        if (error)
 759                return 0;
 760
 761        if (!gfs2_is_stuffed(ip))
 762                dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
 763
 764        gfs2_glock_dq_uninit(&i_gh);
 765
 766        return dblock;
 767}
 768
 769static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
 770{
 771        struct gfs2_bufdata *bd;
 772
 773        lock_buffer(bh);
 774        gfs2_log_lock(sdp);
 775        clear_buffer_dirty(bh);
 776        bd = bh->b_private;
 777        if (bd) {
 778                if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
 779                        list_del_init(&bd->bd_list);
 780                else
 781                        gfs2_remove_from_journal(bh, REMOVE_JDATA);
 782        }
 783        bh->b_bdev = NULL;
 784        clear_buffer_mapped(bh);
 785        clear_buffer_req(bh);
 786        clear_buffer_new(bh);
 787        gfs2_log_unlock(sdp);
 788        unlock_buffer(bh);
 789}
 790
 791static void gfs2_invalidatepage(struct page *page, unsigned int offset,
 792                                unsigned int length)
 793{
 794        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 795        unsigned int stop = offset + length;
 796        int partial_page = (offset || length < PAGE_SIZE);
 797        struct buffer_head *bh, *head;
 798        unsigned long pos = 0;
 799
 800        BUG_ON(!PageLocked(page));
 801        if (!partial_page)
 802                ClearPageChecked(page);
 803        if (!page_has_buffers(page))
 804                goto out;
 805
 806        bh = head = page_buffers(page);
 807        do {
 808                if (pos + bh->b_size > stop)
 809                        return;
 810
 811                if (offset <= pos)
 812                        gfs2_discard(sdp, bh);
 813                pos += bh->b_size;
 814                bh = bh->b_this_page;
 815        } while (bh != head);
 816out:
 817        if (!partial_page)
 818                try_to_release_page(page, 0);
 819}
 820
 821/**
 822 * gfs2_releasepage - free the metadata associated with a page
 823 * @page: the page that's being released
 824 * @gfp_mask: passed from Linux VFS, ignored by us
 825 *
 826 * Calls try_to_free_buffers() to free the buffers and put the page if the
 827 * buffers can be released.
 828 *
 829 * Returns: 1 if the page was put or else 0
 830 */
 831
 832int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 833{
 834        struct address_space *mapping = page->mapping;
 835        struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
 836        struct buffer_head *bh, *head;
 837        struct gfs2_bufdata *bd;
 838
 839        if (!page_has_buffers(page))
 840                return 0;
 841
 842        /*
 843         * From xfs_vm_releasepage: mm accommodates an old ext3 case where
 844         * clean pages might not have had the dirty bit cleared.  Thus, it can
 845         * send actual dirty pages to ->releasepage() via shrink_active_list().
 846         *
 847         * As a workaround, we skip pages that contain dirty buffers below.
 848         * Once ->releasepage isn't called on dirty pages anymore, we can warn
 849         * on dirty buffers like we used to here again.
 850         */
 851
 852        gfs2_log_lock(sdp);
 853        spin_lock(&sdp->sd_ail_lock);
 854        head = bh = page_buffers(page);
 855        do {
 856                if (atomic_read(&bh->b_count))
 857                        goto cannot_release;
 858                bd = bh->b_private;
 859                if (bd && bd->bd_tr)
 860                        goto cannot_release;
 861                if (buffer_dirty(bh) || WARN_ON(buffer_pinned(bh)))
 862                        goto cannot_release;
 863                bh = bh->b_this_page;
 864        } while(bh != head);
 865        spin_unlock(&sdp->sd_ail_lock);
 866
 867        head = bh = page_buffers(page);
 868        do {
 869                bd = bh->b_private;
 870                if (bd) {
 871                        gfs2_assert_warn(sdp, bd->bd_bh == bh);
 872                        if (!list_empty(&bd->bd_list))
 873                                list_del_init(&bd->bd_list);
 874                        bd->bd_bh = NULL;
 875                        bh->b_private = NULL;
 876                        kmem_cache_free(gfs2_bufdata_cachep, bd);
 877                }
 878
 879                bh = bh->b_this_page;
 880        } while (bh != head);
 881        gfs2_log_unlock(sdp);
 882
 883        return try_to_free_buffers(page);
 884
 885cannot_release:
 886        spin_unlock(&sdp->sd_ail_lock);
 887        gfs2_log_unlock(sdp);
 888        return 0;
 889}
 890
 891static const struct address_space_operations gfs2_writeback_aops = {
 892        .writepage = gfs2_writepage,
 893        .writepages = gfs2_writepages,
 894        .readpage = gfs2_readpage,
 895        .readpages = gfs2_readpages,
 896        .bmap = gfs2_bmap,
 897        .invalidatepage = gfs2_invalidatepage,
 898        .releasepage = gfs2_releasepage,
 899        .direct_IO = noop_direct_IO,
 900        .migratepage = buffer_migrate_page,
 901        .is_partially_uptodate = block_is_partially_uptodate,
 902        .error_remove_page = generic_error_remove_page,
 903};
 904
 905static const struct address_space_operations gfs2_ordered_aops = {
 906        .writepage = gfs2_writepage,
 907        .writepages = gfs2_writepages,
 908        .readpage = gfs2_readpage,
 909        .readpages = gfs2_readpages,
 910        .set_page_dirty = __set_page_dirty_buffers,
 911        .bmap = gfs2_bmap,
 912        .invalidatepage = gfs2_invalidatepage,
 913        .releasepage = gfs2_releasepage,
 914        .direct_IO = noop_direct_IO,
 915        .migratepage = buffer_migrate_page,
 916        .is_partially_uptodate = block_is_partially_uptodate,
 917        .error_remove_page = generic_error_remove_page,
 918};
 919
 920static const struct address_space_operations gfs2_jdata_aops = {
 921        .writepage = gfs2_jdata_writepage,
 922        .writepages = gfs2_jdata_writepages,
 923        .readpage = gfs2_readpage,
 924        .readpages = gfs2_readpages,
 925        .set_page_dirty = jdata_set_page_dirty,
 926        .bmap = gfs2_bmap,
 927        .invalidatepage = gfs2_invalidatepage,
 928        .releasepage = gfs2_releasepage,
 929        .is_partially_uptodate = block_is_partially_uptodate,
 930        .error_remove_page = generic_error_remove_page,
 931};
 932
 933void gfs2_set_aops(struct inode *inode)
 934{
 935        struct gfs2_inode *ip = GFS2_I(inode);
 936        struct gfs2_sbd *sdp = GFS2_SB(inode);
 937
 938        if (gfs2_is_jdata(ip))
 939                inode->i_mapping->a_ops = &gfs2_jdata_aops;
 940        else if (gfs2_is_writeback(sdp))
 941                inode->i_mapping->a_ops = &gfs2_writeback_aops;
 942        else if (gfs2_is_ordered(sdp))
 943                inode->i_mapping->a_ops = &gfs2_ordered_aops;
 944        else
 945                BUG();
 946}
 947