LXR linux/include/linux/pagemap.h

   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_PAGEMAP_H
   3#define _LINUX_PAGEMAP_H
   4
   5/*
   6 * Copyright 1995 Linus Torvalds
   7 */
   8#include <linux/mm.h>
   9#include <linux/fs.h>
  10#include <linux/list.h>
  11#include <linux/highmem.h>
  12#include <linux/compiler.h>
  13#include <linux/uaccess.h>
  14#include <linux/gfp.h>
  15#include <linux/bitops.h>
  16#include <linux/hardirq.h> /* for in_interrupt() */
  17#include <linux/hugetlb_inline.h>
  18
  19struct pagevec;
  20
  21/*
  22 * Bits in mapping->flags.
  23 */
  24enum mapping_flags {
  25        AS_EIO          = 0,    /* IO error on async write */
  26        AS_ENOSPC       = 1,    /* ENOSPC on async write */
  27        AS_MM_ALL_LOCKS = 2,    /* under mm_take_all_locks() */
  28        AS_UNEVICTABLE  = 3,    /* e.g., ramdisk, SHM_LOCK */
  29        AS_EXITING      = 4,    /* final truncate in progress */
  30        /* writeback related tags are not used */
  31        AS_NO_WRITEBACK_TAGS = 5,
  32};
  33
  34/**
  35 * mapping_set_error - record a writeback error in the address_space
  36 * @mapping: the mapping in which an error should be set
  37 * @error: the error to set in the mapping
  38 *
  39 * When writeback fails in some way, we must record that error so that
  40 * userspace can be informed when fsync and the like are called.  We endeavor
  41 * to report errors on any file that was open at the time of the error.  Some
  42 * internal callers also need to know when writeback errors have occurred.
  43 *
  44 * When a writeback error occurs, most filesystems will want to call
  45 * mapping_set_error to record the error in the mapping so that it can be
  46 * reported when the application calls fsync(2).
  47 */
  48static inline void mapping_set_error(struct address_space *mapping, int error)
  49{
  50        if (likely(!error))
  51                return;
  52
  53        /* Record in wb_err for checkers using errseq_t based tracking */
  54        filemap_set_wb_err(mapping, error);
  55
  56        /* Record it in flags for now, for legacy callers */
  57        if (error == -ENOSPC)
  58                set_bit(AS_ENOSPC, &mapping->flags);
  59        else
  60                set_bit(AS_EIO, &mapping->flags);
  61}
  62
  63static inline void mapping_set_unevictable(struct address_space *mapping)
  64{
  65        set_bit(AS_UNEVICTABLE, &mapping->flags);
  66}
  67
  68static inline void mapping_clear_unevictable(struct address_space *mapping)
  69{
  70        clear_bit(AS_UNEVICTABLE, &mapping->flags);
  71}
  72
  73static inline bool mapping_unevictable(struct address_space *mapping)
  74{
  75        return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
  76}
  77
  78static inline void mapping_set_exiting(struct address_space *mapping)
  79{
  80        set_bit(AS_EXITING, &mapping->flags);
  81}
  82
  83static inline int mapping_exiting(struct address_space *mapping)
  84{
  85        return test_bit(AS_EXITING, &mapping->flags);
  86}
  87
  88static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
  89{
  90        set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
  91}
  92
  93static inline int mapping_use_writeback_tags(struct address_space *mapping)
  94{
  95        return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
  96}
  97
  98static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
  99{
 100        return mapping->gfp_mask;
 101}
 102
 103/* Restricts the given gfp_mask to what the mapping allows. */
 104static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 105                gfp_t gfp_mask)
 106{
 107        return mapping_gfp_mask(mapping) & gfp_mask;
 108}
 109
 110/*
 111 * This is non-atomic.  Only to be used before the mapping is activated.
 112 * Probably needs a barrier...
 113 */
 114static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 115{
 116        m->gfp_mask = mask;
 117}
 118
 119void release_pages(struct page **pages, int nr);
 120
 121/*
 122 * speculatively take a reference to a page.
 123 * If the page is free (_refcount == 0), then _refcount is untouched, and 0
 124 * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
 125 *
 126 * This function must be called inside the same rcu_read_lock() section as has
 127 * been used to lookup the page in the pagecache radix-tree (or page table):
 128 * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
 129 *
 130 * Unless an RCU grace period has passed, the count of all pages coming out
 131 * of the allocator must be considered unstable. page_count may return higher
 132 * than expected, and put_page must be able to do the right thing when the
 133 * page has been finished with, no matter what it is subsequently allocated
 134 * for (because put_page is what is used here to drop an invalid speculative
 135 * reference).
 136 *
 137 * This is the interesting part of the lockless pagecache (and lockless
 138 * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
 139 * has the following pattern:
 140 * 1. find page in radix tree
 141 * 2. conditionally increment refcount
 142 * 3. check the page is still in pagecache (if no, goto 1)
 143 *
 144 * Remove-side that cares about stability of _refcount (eg. reclaim) has the
 145 * following (with the i_pages lock held):
 146 * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
 147 * B. remove page from pagecache
 148 * C. free the page
 149 *
 150 * There are 2 critical interleavings that matter:
 151 * - 2 runs before A: in this case, A sees elevated refcount and bails out
 152 * - A runs before 2: in this case, 2 sees zero refcount and retries;
 153 *   subsequently, B will complete and 1 will find no page, causing the
 154 *   lookup to return NULL.
 155 *
 156 * It is possible that between 1 and 2, the page is removed then the exact same
 157 * page is inserted into the same position in pagecache. That's OK: the
 158 * old find_get_page using a lock could equally have run before or after
 159 * such a re-insertion, depending on order that locks are granted.
 160 *
 161 * Lookups racing against pagecache insertion isn't a big problem: either 1
 162 * will find the page or it will not. Likewise, the old find_get_page could run
 163 * either before the insertion or afterwards, depending on timing.
 164 */
 165static inline int __page_cache_add_speculative(struct page *page, int count)
 166{
 167#ifdef CONFIG_TINY_RCU
 168# ifdef CONFIG_PREEMPT_COUNT
 169        VM_BUG_ON(!in_atomic() && !irqs_disabled());
 170# endif
 171        /*
 172         * Preempt must be disabled here - we rely on rcu_read_lock doing
 173         * this for us.
 174         *
 175         * Pagecache won't be truncated from interrupt context, so if we have
 176         * found a page in the radix tree here, we have pinned its refcount by
 177         * disabling preempt, and hence no need for the "speculative get" that
 178         * SMP requires.
 179         */
 180        VM_BUG_ON_PAGE(page_count(page) == 0, page);
 181        page_ref_add(page, count);
 182
 183#else
 184        if (unlikely(!page_ref_add_unless(page, count, 0))) {
 185                /*
 186                 * Either the page has been freed, or will be freed.
 187                 * In either case, retry here and the caller should
 188                 * do the right thing (see comments above).
 189                 */
 190                return 0;
 191        }
 192#endif
 193        VM_BUG_ON_PAGE(PageTail(page), page);
 194
 195        return 1;
 196}
 197
 198static inline int page_cache_get_speculative(struct page *page)
 199{
 200        return __page_cache_add_speculative(page, 1);
 201}
 202
 203static inline int page_cache_add_speculative(struct page *page, int count)
 204{
 205        return __page_cache_add_speculative(page, count);
 206}
 207
 208#ifdef CONFIG_NUMA
 209extern struct page *__page_cache_alloc(gfp_t gfp);
 210#else
 211static inline struct page *__page_cache_alloc(gfp_t gfp)
 212{
 213        return alloc_pages(gfp, 0);
 214}
 215#endif
 216
 217static inline struct page *page_cache_alloc(struct address_space *x)
 218{
 219        return __page_cache_alloc(mapping_gfp_mask(x));
 220}
 221
 222static inline gfp_t readahead_gfp_mask(struct address_space *x)
 223{
 224        return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
 225}
 226
 227typedef int filler_t(void *, struct page *);
 228
 229pgoff_t page_cache_next_miss(struct address_space *mapping,
 230                             pgoff_t index, unsigned long max_scan);
 231pgoff_t page_cache_prev_miss(struct address_space *mapping,
 232                             pgoff_t index, unsigned long max_scan);
 233
 234#define FGP_ACCESSED            0x00000001
 235#define FGP_LOCK                0x00000002
 236#define FGP_CREAT               0x00000004
 237#define FGP_WRITE               0x00000008
 238#define FGP_NOFS                0x00000010
 239#define FGP_NOWAIT              0x00000020
 240#define FGP_FOR_MMAP            0x00000040
 241
 242struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 243                int fgp_flags, gfp_t cache_gfp_mask);
 244
 245/**
 246 * find_get_page - find and get a page reference
 247 * @mapping: the address_space to search
 248 * @offset: the page index
 249 *
 250 * Looks up the page cache slot at @mapping & @offset.  If there is a
 251 * page cache page, it is returned with an increased refcount.
 252 *
 253 * Otherwise, %NULL is returned.
 254 */
 255static inline struct page *find_get_page(struct address_space *mapping,
 256                                        pgoff_t offset)
 257{
 258        return pagecache_get_page(mapping, offset, 0, 0);
 259}
 260
 261static inline struct page *find_get_page_flags(struct address_space *mapping,
 262                                        pgoff_t offset, int fgp_flags)
 263{
 264        return pagecache_get_page(mapping, offset, fgp_flags, 0);
 265}
 266
 267/**
 268 * find_lock_page - locate, pin and lock a pagecache page
 269 * @mapping: the address_space to search
 270 * @offset: the page index
 271 *
 272 * Looks up the page cache slot at @mapping & @offset.  If there is a
 273 * page cache page, it is returned locked and with an increased
 274 * refcount.
 275 *
 276 * Otherwise, %NULL is returned.
 277 *
 278 * find_lock_page() may sleep.
 279 */
 280static inline struct page *find_lock_page(struct address_space *mapping,
 281                                        pgoff_t offset)
 282{
 283        return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
 284}
 285
 286/**
 287 * find_or_create_page - locate or add a pagecache page
 288 * @mapping: the page's address_space
 289 * @index: the page's index into the mapping
 290 * @gfp_mask: page allocation mode
 291 *
 292 * Looks up the page cache slot at @mapping & @offset.  If there is a
 293 * page cache page, it is returned locked and with an increased
 294 * refcount.
 295 *
 296 * If the page is not present, a new page is allocated using @gfp_mask
 297 * and added to the page cache and the VM's LRU list.  The page is
 298 * returned locked and with an increased refcount.
 299 *
 300 * On memory exhaustion, %NULL is returned.
 301 *
 302 * find_or_create_page() may sleep, even if @gfp_flags specifies an
 303 * atomic allocation!
 304 */
 305static inline struct page *find_or_create_page(struct address_space *mapping,
 306                                        pgoff_t index, gfp_t gfp_mask)
 307{
 308        return pagecache_get_page(mapping, index,
 309                                        FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
 310                                        gfp_mask);
 311}
 312
 313/**
 314 * grab_cache_page_nowait - returns locked page at given index in given cache
 315 * @mapping: target address_space
 316 * @index: the page index
 317 *
 318 * Same as grab_cache_page(), but do not wait if the page is unavailable.
 319 * This is intended for speculative data generators, where the data can
 320 * be regenerated if the page couldn't be grabbed.  This routine should
 321 * be safe to call while holding the lock for another page.
 322 *
 323 * Clear __GFP_FS when allocating the page to avoid recursion into the fs
 324 * and deadlock against the caller's locked page.
 325 */
 326static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 327                                pgoff_t index)
 328{
 329        return pagecache_get_page(mapping, index,
 330                        FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
 331                        mapping_gfp_mask(mapping));
 332}
 333
 334/*
 335 * Given the page we found in the page cache, return the page corresponding
 336 * to this index in the file
 337 */
 338static inline struct page *find_subpage(struct page *head, pgoff_t index)
 339{
 340        /* HugeTLBfs wants the head page regardless */
 341        if (PageHuge(head))
 342                return head;
 343
 344        return head + (index & (hpage_nr_pages(head) - 1));
 345}
 346
 347struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 348struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
 349unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
 350                          unsigned int nr_entries, struct page **entries,
 351                          pgoff_t *indices);
 352unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 353                        pgoff_t end, unsigned int nr_pages,
 354                        struct page **pages);
 355static inline unsigned find_get_pages(struct address_space *mapping,
 356                        pgoff_t *start, unsigned int nr_pages,
 357                        struct page **pages)
 358{
 359        return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages,
 360                                    pages);
 361}
 362unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 363                               unsigned int nr_pages, struct page **pages);
 364unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 365                        pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
 366                        struct page **pages);
 367static inline unsigned find_get_pages_tag(struct address_space *mapping,
 368                        pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
 369                        struct page **pages)
 370{
 371        return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
 372                                        nr_pages, pages);
 373}
 374
 375struct page *grab_cache_page_write_begin(struct address_space *mapping,
 376                        pgoff_t index, unsigned flags);
 377
 378/*
 379 * Returns locked page at given index in given cache, creating it if needed.
 380 */
 381static inline struct page *grab_cache_page(struct address_space *mapping,
 382                                                                pgoff_t index)
 383{
 384        return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
 385}
 386
 387extern struct page * read_cache_page(struct address_space *mapping,
 388                                pgoff_t index, filler_t *filler, void *data);
 389extern struct page * read_cache_page_gfp(struct address_space *mapping,
 390                                pgoff_t index, gfp_t gfp_mask);
 391extern int read_cache_pages(struct address_space *mapping,
 392                struct list_head *pages, filler_t *filler, void *data);
 393
 394static inline struct page *read_mapping_page(struct address_space *mapping,
 395                                pgoff_t index, void *data)
 396{
 397        return read_cache_page(mapping, index, NULL, data);
 398}
 399
 400/*
 401 * Get index of the page with in radix-tree
 402 * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
 403 */
 404static inline pgoff_t page_to_index(struct page *page)
 405{
 406        pgoff_t pgoff;
 407
 408        if (likely(!PageTransTail(page)))
 409                return page->index;
 410
 411        /*
 412         *  We don't initialize ->index for tail pages: calculate based on
 413         *  head page
 414         */
 415        pgoff = compound_head(page)->index;
 416        pgoff += page - compound_head(page);
 417        return pgoff;
 418}
 419
 420/*
 421 * Get the offset in PAGE_SIZE.
 422 * (TODO: hugepage should have ->index in PAGE_SIZE)
 423 */
 424static inline pgoff_t page_to_pgoff(struct page *page)
 425{
 426        if (unlikely(PageHeadHuge(page)))
 427                return page->index << compound_order(page);
 428
 429        return page_to_index(page);
 430}
 431
 432/*
 433 * Return byte-offset into filesystem object for page.
 434 */
 435static inline loff_t page_offset(struct page *page)
 436{
 437        return ((loff_t)page->index) << PAGE_SHIFT;
 438}
 439
 440static inline loff_t page_file_offset(struct page *page)
 441{
 442        return ((loff_t)page_index(page)) << PAGE_SHIFT;
 443}
 444
 445extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
 446                                     unsigned long address);
 447
 448static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 449                                        unsigned long address)
 450{
 451        pgoff_t pgoff;
 452        if (unlikely(is_vm_hugetlb_page(vma)))
 453                return linear_hugepage_index(vma, address);
 454        pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 455        pgoff += vma->vm_pgoff;
 456        return pgoff;
 457}
 458
 459extern void __lock_page(struct page *page);
 460extern int __lock_page_killable(struct page *page);
 461extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 462                                unsigned int flags);
 463extern void unlock_page(struct page *page);
 464
 465/*
 466 * Return true if the page was successfully locked
 467 */
 468static inline int trylock_page(struct page *page)
 469{
 470        page = compound_head(page);
 471        return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
 472}
 473
 474/*
 475 * lock_page may only be called if we have the page's inode pinned.
 476 */
 477static inline void lock_page(struct page *page)
 478{
 479        might_sleep();
 480        if (!trylock_page(page))
 481                __lock_page(page);
 482}
 483
 484/*
 485 * lock_page_killable is like lock_page but can be interrupted by fatal
 486 * signals.  It returns 0 if it locked the page and -EINTR if it was
 487 * killed while waiting.
 488 */
 489static inline int lock_page_killable(struct page *page)
 490{
 491        might_sleep();
 492        if (!trylock_page(page))
 493                return __lock_page_killable(page);
 494        return 0;
 495}
 496
 497/*
 498 * lock_page_or_retry - Lock the page, unless this would block and the
 499 * caller indicated that it can handle a retry.
 500 *
 501 * Return value and mmap_sem implications depend on flags; see
 502 * __lock_page_or_retry().
 503 */
 504static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 505                                     unsigned int flags)
 506{
 507        might_sleep();
 508        return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
 509}
 510
 511/*
 512 * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
 513 * and should not be used directly.
 514 */
 515extern void wait_on_page_bit(struct page *page, int bit_nr);
 516extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
 517
 518/* 
 519 * Wait for a page to be unlocked.
 520 *
 521 * This must be called with the caller "holding" the page,
 522 * ie with increased "page->count" so that the page won't
 523 * go away during the wait..
 524 */
 525static inline void wait_on_page_locked(struct page *page)
 526{
 527        if (PageLocked(page))
 528                wait_on_page_bit(compound_head(page), PG_locked);
 529}
 530
 531static inline int wait_on_page_locked_killable(struct page *page)
 532{
 533        if (!PageLocked(page))
 534                return 0;
 535        return wait_on_page_bit_killable(compound_head(page), PG_locked);
 536}
 537
 538extern void put_and_wait_on_page_locked(struct page *page);
 539
 540void wait_on_page_writeback(struct page *page);
 541extern void end_page_writeback(struct page *page);
 542void wait_for_stable_page(struct page *page);
 543
 544void page_endio(struct page *page, bool is_write, int err);
 545
 546/*
 547 * Add an arbitrary waiter to a page's wait queue
 548 */
 549extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
 550
 551/*
 552 * Fault everything in given userspace address range in.
 553 */
 554static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 555{
 556        char __user *end = uaddr + size - 1;
 557
 558        if (unlikely(size == 0))
 559                return 0;
 560
 561        if (unlikely(uaddr > end))
 562                return -EFAULT;
 563        /*
 564         * Writing zeroes into userspace here is OK, because we know that if
 565         * the zero gets there, we'll be overwriting it.
 566         */
 567        do {
 568                if (unlikely(__put_user(0, uaddr) != 0))
 569                        return -EFAULT;
 570                uaddr += PAGE_SIZE;
 571        } while (uaddr <= end);
 572
 573        /* Check whether the range spilled into the next page. */
 574        if (((unsigned long)uaddr & PAGE_MASK) ==
 575                        ((unsigned long)end & PAGE_MASK))
 576                return __put_user(0, end);
 577
 578        return 0;
 579}
 580
 581static inline int fault_in_pages_readable(const char __user *uaddr, int size)
 582{
 583        volatile char c;
 584        const char __user *end = uaddr + size - 1;
 585
 586        if (unlikely(size == 0))
 587                return 0;
 588
 589        if (unlikely(uaddr > end))
 590                return -EFAULT;
 591
 592        do {
 593                if (unlikely(__get_user(c, uaddr) != 0))
 594                        return -EFAULT;
 595                uaddr += PAGE_SIZE;
 596        } while (uaddr <= end);
 597
 598        /* Check whether the range spilled into the next page. */
 599        if (((unsigned long)uaddr & PAGE_MASK) ==
 600                        ((unsigned long)end & PAGE_MASK)) {
 601                return __get_user(c, end);
 602        }
 603
 604        (void)c;
 605        return 0;
 606}
 607
 608int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 609                                pgoff_t index, gfp_t gfp_mask);
 610int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 611                                pgoff_t index, gfp_t gfp_mask);
 612extern void delete_from_page_cache(struct page *page);
 613extern void __delete_from_page_cache(struct page *page, void *shadow);
 614int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 615void delete_from_page_cache_batch(struct address_space *mapping,
 616                                  struct pagevec *pvec);
 617
 618/*
 619 * Like add_to_page_cache_locked, but used to add newly allocated pages:
 620 * the page is new, so we can just run __SetPageLocked() against it.
 621 */
 622static inline int add_to_page_cache(struct page *page,
 623                struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
 624{
 625        int error;
 626
 627        __SetPageLocked(page);
 628        error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
 629        if (unlikely(error))
 630                __ClearPageLocked(page);
 631        return error;
 632}
 633
 634static inline unsigned long dir_pages(struct inode *inode)
 635{
 636        return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
 637                               PAGE_SHIFT;
 638}
 639
 640/**
 641 * page_mkwrite_check_truncate - check if page was truncated
 642 * @page: the page to check
 643 * @inode: the inode to check the page against
 644 *
 645 * Returns the number of bytes in the page up to EOF,
 646 * or -EFAULT if the page was truncated.
 647 */
 648static inline int page_mkwrite_check_truncate(struct page *page,
 649                                              struct inode *inode)
 650{
 651        loff_t size = i_size_read(inode);
 652        pgoff_t index = size >> PAGE_SHIFT;
 653        int offset = offset_in_page(size);
 654
 655        if (page->mapping != inode->i_mapping)
 656                return -EFAULT;
 657
 658        /* page is wholly inside EOF */
 659        if (page->index < index)
 660                return PAGE_SIZE;
 661        /* page is wholly past EOF */
 662        if (page->index > index || !offset)
 663                return -EFAULT;
 664        /* page is partially inside EOF */
 665        return offset;
 666}
 667
 668#endif /* _LINUX_PAGEMAP_H */
 669