LXR linux/mm/truncate.c

   1/*
   2 * mm/truncate.c - code for taking down pages from address_spaces
   3 *
   4 * Copyright (C) 2002, Linus Torvalds
   5 *
   6 * 10Sep2002    akpm@zip.com.au
   7 *              Initial version.
   8 */
   9
  10#include <linux/kernel.h>
  11#include <linux/backing-dev.h>
  12#include <linux/mm.h>
  13#include <linux/swap.h>
  14#include <linux/module.h>
  15#include <linux/pagemap.h>
  16#include <linux/highmem.h>
  17#include <linux/pagevec.h>
  18#include <linux/task_io_accounting_ops.h>
  19#include <linux/buffer_head.h>  /* grr. try_to_release_page,
  20                                   do_invalidatepage */
  21
  22
  23/**
  24 * do_invalidatepage - invalidate part of all of a page
  25 * @page: the page which is affected
  26 * @offset: the index of the truncation point
  27 *
  28 * do_invalidatepage() is called when all or part of the page has become
  29 * invalidated by a truncate operation.
  30 *
  31 * do_invalidatepage() does not have to release all buffers, but it must
  32 * ensure that no dirty buffer is left outside @offset and that no I/O
  33 * is underway against any of the blocks which are outside the truncation
  34 * point.  Because the caller is about to free (and possibly reuse) those
  35 * blocks on-disk.
  36 */
  37void do_invalidatepage(struct page *page, unsigned long offset)
  38{
  39        void (*invalidatepage)(struct page *, unsigned long);
  40        invalidatepage = page->mapping->a_ops->invalidatepage;
  41#ifdef CONFIG_BLOCK
  42        if (!invalidatepage)
  43                invalidatepage = block_invalidatepage;
  44#endif
  45        if (invalidatepage)
  46                (*invalidatepage)(page, offset);
  47}
  48
  49static inline void truncate_partial_page(struct page *page, unsigned partial)
  50{
  51        zero_user_page(page, partial, PAGE_CACHE_SIZE - partial, KM_USER0);
  52        if (PagePrivate(page))
  53                do_invalidatepage(page, partial);
  54}
  55
  56/*
  57 * This cancels just the dirty bit on the kernel page itself, it
  58 * does NOT actually remove dirty bits on any mmap's that may be
  59 * around. It also leaves the page tagged dirty, so any sync
  60 * activity will still find it on the dirty lists, and in particular,
  61 * clear_page_dirty_for_io() will still look at the dirty bits in
  62 * the VM.
  63 *
  64 * Doing this should *normally* only ever be done when a page
  65 * is truncated, and is not actually mapped anywhere at all. However,
  66 * fs/buffer.c does this when it notices that somebody has cleaned
  67 * out all the buffers on a page without actually doing it through
  68 * the VM. Can you say "ext3 is horribly ugly"? Tought you could.
  69 */
  70void cancel_dirty_page(struct page *page, unsigned int account_size)
  71{
  72        if (TestClearPageDirty(page)) {
  73                struct address_space *mapping = page->mapping;
  74                if (mapping && mapping_cap_account_dirty(mapping)) {
  75                        dec_zone_page_state(page, NR_FILE_DIRTY);
  76                        dec_bdi_stat(mapping->backing_dev_info,
  77                                        BDI_RECLAIMABLE);
  78                        if (account_size)
  79                                task_io_account_cancelled_write(account_size);
  80                }
  81        }
  82}
  83EXPORT_SYMBOL(cancel_dirty_page);
  84
  85/*
  86 * If truncate cannot remove the fs-private metadata from the page, the page
  87 * becomes anonymous.  It will be left on the LRU and may even be mapped into
  88 * user pagetables if we're racing with filemap_fault().
  89 *
  90 * We need to bale out if page->mapping is no longer equal to the original
  91 * mapping.  This happens a) when the VM reclaimed the page while we waited on
  92 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  93 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  94 */
  95static void
  96truncate_complete_page(struct address_space *mapping, struct page *page)
  97{
  98        if (page->mapping != mapping)
  99                return;
 100
 101        cancel_dirty_page(page, PAGE_CACHE_SIZE);
 102
 103        if (PagePrivate(page))
 104                do_invalidatepage(page, 0);
 105
 106        remove_from_page_cache(page);
 107        ClearPageUptodate(page);
 108        ClearPageMappedToDisk(page);
 109        page_cache_release(page);       /* pagecache ref */
 110}
 111
 112/*
 113 * This is for invalidate_mapping_pages().  That function can be called at
 114 * any time, and is not supposed to throw away dirty pages.  But pages can
 115 * be marked dirty at any time too, so use remove_mapping which safely
 116 * discards clean, unused pages.
 117 *
 118 * Returns non-zero if the page was successfully invalidated.
 119 */
 120static int
 121invalidate_complete_page(struct address_space *mapping, struct page *page)
 122{
 123        int ret;
 124
 125        if (page->mapping != mapping)
 126                return 0;
 127
 128        if (PagePrivate(page) && !try_to_release_page(page, 0))
 129                return 0;
 130
 131        ret = remove_mapping(mapping, page);
 132
 133        return ret;
 134}
 135
 136/**
 137 * truncate_inode_pages - truncate range of pages specified by start and
 138 * end byte offsets
 139 * @mapping: mapping to truncate
 140 * @lstart: offset from which to truncate
 141 * @lend: offset to which to truncate
 142 *
 143 * Truncate the page cache, removing the pages that are between
 144 * specified offsets (and zeroing out partial page
 145 * (if lstart is not page aligned)).
 146 *
 147 * Truncate takes two passes - the first pass is nonblocking.  It will not
 148 * block on page locks and it will not block on writeback.  The second pass
 149 * will wait.  This is to prevent as much IO as possible in the affected region.
 150 * The first pass will remove most pages, so the search cost of the second pass
 151 * is low.
 152 *
 153 * When looking at page->index outside the page lock we need to be careful to
 154 * copy it into a local to avoid races (it could change at any time).
 155 *
 156 * We pass down the cache-hot hint to the page freeing code.  Even if the
 157 * mapping is large, it is probably the case that the final pages are the most
 158 * recently touched, and freeing happens in ascending file offset order.
 159 */
 160void truncate_inode_pages_range(struct address_space *mapping,
 161                                loff_t lstart, loff_t lend)
 162{
 163        const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
 164        pgoff_t end;
 165        const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
 166        struct pagevec pvec;
 167        pgoff_t next;
 168        int i;
 169
 170        if (mapping->nrpages == 0)
 171                return;
 172
 173        BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
 174        end = (lend >> PAGE_CACHE_SHIFT);
 175
 176        pagevec_init(&pvec, 0);
 177        next = start;
 178        while (next <= end &&
 179               pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 180                for (i = 0; i < pagevec_count(&pvec); i++) {
 181                        struct page *page = pvec.pages[i];
 182                        pgoff_t page_index = page->index;
 183
 184                        if (page_index > end) {
 185                                next = page_index;
 186                                break;
 187                        }
 188
 189                        if (page_index > next)
 190                                next = page_index;
 191                        next++;
 192                        if (TestSetPageLocked(page))
 193                                continue;
 194                        if (PageWriteback(page)) {
 195                                unlock_page(page);
 196                                continue;
 197                        }
 198                        if (page_mapped(page)) {
 199                                unmap_mapping_range(mapping,
 200                                  (loff_t)page_index<<PAGE_CACHE_SHIFT,
 201                                  PAGE_CACHE_SIZE, 0);
 202                        }
 203                        truncate_complete_page(mapping, page);
 204                        unlock_page(page);
 205                }
 206                pagevec_release(&pvec);
 207                cond_resched();
 208        }
 209
 210        if (partial) {
 211                struct page *page = find_lock_page(mapping, start - 1);
 212                if (page) {
 213                        wait_on_page_writeback(page);
 214                        truncate_partial_page(page, partial);
 215                        unlock_page(page);
 216                        page_cache_release(page);
 217                }
 218        }
 219
 220        next = start;
 221        for ( ; ; ) {
 222                cond_resched();
 223                if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 224                        if (next == start)
 225                                break;
 226                        next = start;
 227                        continue;
 228                }
 229                if (pvec.pages[0]->index > end) {
 230                        pagevec_release(&pvec);
 231                        break;
 232                }
 233                for (i = 0; i < pagevec_count(&pvec); i++) {
 234                        struct page *page = pvec.pages[i];
 235
 236                        if (page->index > end)
 237                                break;
 238                        lock_page(page);
 239                        wait_on_page_writeback(page);
 240                        if (page_mapped(page)) {
 241                                unmap_mapping_range(mapping,
 242                                  (loff_t)page->index<<PAGE_CACHE_SHIFT,
 243                                  PAGE_CACHE_SIZE, 0);
 244                        }
 245                        if (page->index > next)
 246                                next = page->index;
 247                        next++;
 248                        truncate_complete_page(mapping, page);
 249                        unlock_page(page);
 250                }
 251                pagevec_release(&pvec);
 252        }
 253}
 254EXPORT_SYMBOL(truncate_inode_pages_range);
 255
 256/**
 257 * truncate_inode_pages - truncate *all* the pages from an offset
 258 * @mapping: mapping to truncate
 259 * @lstart: offset from which to truncate
 260 *
 261 * Called under (and serialised by) inode->i_mutex.
 262 */
 263void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 264{
 265        truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
 266}
 267EXPORT_SYMBOL(truncate_inode_pages);
 268
 269unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 270                                pgoff_t start, pgoff_t end, bool be_atomic)
 271{
 272        struct pagevec pvec;
 273        pgoff_t next = start;
 274        unsigned long ret = 0;
 275        int i;
 276
 277        pagevec_init(&pvec, 0);
 278        while (next <= end &&
 279                        pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 280                for (i = 0; i < pagevec_count(&pvec); i++) {
 281                        struct page *page = pvec.pages[i];
 282                        pgoff_t index;
 283                        int lock_failed;
 284
 285                        lock_failed = TestSetPageLocked(page);
 286
 287                        /*
 288                         * We really shouldn't be looking at the ->index of an
 289                         * unlocked page.  But we're not allowed to lock these
 290                         * pages.  So we rely upon nobody altering the ->index
 291                         * of this (pinned-by-us) page.
 292                         */
 293                        index = page->index;
 294                        if (index > next)
 295                                next = index;
 296                        next++;
 297                        if (lock_failed)
 298                                continue;
 299
 300                        if (PageDirty(page) || PageWriteback(page))
 301                                goto unlock;
 302                        if (page_mapped(page))
 303                                goto unlock;
 304                        ret += invalidate_complete_page(mapping, page);
 305unlock:
 306                        unlock_page(page);
 307                        if (next > end)
 308                                break;
 309                }
 310                pagevec_release(&pvec);
 311                if (likely(!be_atomic))
 312                        cond_resched();
 313        }
 314        return ret;
 315}
 316
 317/**
 318 * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
 319 * @mapping: the address_space which holds the pages to invalidate
 320 * @start: the offset 'from' which to invalidate
 321 * @end: the offset 'to' which to invalidate (inclusive)
 322 *
 323 * This function only removes the unlocked pages, if you want to
 324 * remove all the pages of one inode, you must call truncate_inode_pages.
 325 *
 326 * invalidate_mapping_pages() will not block on IO activity. It will not
 327 * invalidate pages which are dirty, locked, under writeback or mapped into
 328 * pagetables.
 329 */
 330unsigned long invalidate_mapping_pages(struct address_space *mapping,
 331                                pgoff_t start, pgoff_t end)
 332{
 333        return __invalidate_mapping_pages(mapping, start, end, false);
 334}
 335EXPORT_SYMBOL(invalidate_mapping_pages);
 336
 337/*
 338 * This is like invalidate_complete_page(), except it ignores the page's
 339 * refcount.  We do this because invalidate_inode_pages2() needs stronger
 340 * invalidation guarantees, and cannot afford to leave pages behind because
 341 * shrink_page_list() has a temp ref on them, or because they're transiently
 342 * sitting in the lru_cache_add() pagevecs.
 343 */
 344static int
 345invalidate_complete_page2(struct address_space *mapping, struct page *page)
 346{
 347        if (page->mapping != mapping)
 348                return 0;
 349
 350        if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
 351                return 0;
 352
 353        write_lock_irq(&mapping->tree_lock);
 354        if (PageDirty(page))
 355                goto failed;
 356
 357        BUG_ON(PagePrivate(page));
 358        __remove_from_page_cache(page);
 359        write_unlock_irq(&mapping->tree_lock);
 360        ClearPageUptodate(page);
 361        page_cache_release(page);       /* pagecache ref */
 362        return 1;
 363failed:
 364        write_unlock_irq(&mapping->tree_lock);
 365        return 0;
 366}
 367
 368static int do_launder_page(struct address_space *mapping, struct page *page)
 369{
 370        if (!PageDirty(page))
 371                return 0;
 372        if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
 373                return 0;
 374        return mapping->a_ops->launder_page(page);
 375}
 376
 377/**
 378 * invalidate_inode_pages2_range - remove range of pages from an address_space
 379 * @mapping: the address_space
 380 * @start: the page offset 'from' which to invalidate
 381 * @end: the page offset 'to' which to invalidate (inclusive)
 382 *
 383 * Any pages which are found to be mapped into pagetables are unmapped prior to
 384 * invalidation.
 385 *
 386 * Returns -EIO if any pages could not be invalidated.
 387 */
 388int invalidate_inode_pages2_range(struct address_space *mapping,
 389                                  pgoff_t start, pgoff_t end)
 390{
 391        struct pagevec pvec;
 392        pgoff_t next;
 393        int i;
 394        int ret = 0;
 395        int did_range_unmap = 0;
 396        int wrapped = 0;
 397
 398        pagevec_init(&pvec, 0);
 399        next = start;
 400        while (next <= end && !wrapped &&
 401                pagevec_lookup(&pvec, mapping, next,
 402                        min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
 403                for (i = 0; i < pagevec_count(&pvec); i++) {
 404                        struct page *page = pvec.pages[i];
 405                        pgoff_t page_index;
 406
 407                        lock_page(page);
 408                        if (page->mapping != mapping) {
 409                                unlock_page(page);
 410                                continue;
 411                        }
 412                        page_index = page->index;
 413                        next = page_index + 1;
 414                        if (next == 0)
 415                                wrapped = 1;
 416                        if (page_index > end) {
 417                                unlock_page(page);
 418                                break;
 419                        }
 420                        wait_on_page_writeback(page);
 421                        if (page_mapped(page)) {
 422                                if (!did_range_unmap) {
 423                                        /*
 424                                         * Zap the rest of the file in one hit.
 425                                         */
 426                                        unmap_mapping_range(mapping,
 427                                           (loff_t)page_index<<PAGE_CACHE_SHIFT,
 428                                           (loff_t)(end - page_index + 1)
 429                                                        << PAGE_CACHE_SHIFT,
 430                                            0);
 431                                        did_range_unmap = 1;
 432                                } else {
 433                                        /*
 434                                         * Just zap this page
 435                                         */
 436                                        unmap_mapping_range(mapping,
 437                                          (loff_t)page_index<<PAGE_CACHE_SHIFT,
 438                                          PAGE_CACHE_SIZE, 0);
 439                                }
 440                        }
 441                        BUG_ON(page_mapped(page));
 442                        ret = do_launder_page(mapping, page);
 443                        if (ret == 0 && !invalidate_complete_page2(mapping, page))
 444                                ret = -EIO;
 445                        unlock_page(page);
 446                }
 447                pagevec_release(&pvec);
 448                cond_resched();
 449        }
 450        return ret;
 451}
 452EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
 453
 454/**
 455 * invalidate_inode_pages2 - remove all pages from an address_space
 456 * @mapping: the address_space
 457 *
 458 * Any pages which are found to be mapped into pagetables are unmapped prior to
 459 * invalidation.
 460 *
 461 * Returns -EIO if any pages could not be invalidated.
 462 */
 463int invalidate_inode_pages2(struct address_space *mapping)
 464{
 465        return invalidate_inode_pages2_range(mapping, 0, -1);
 466}
 467EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
 468