linux/include/linux/pagemap.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_PAGEMAP_H
   3#define _LINUX_PAGEMAP_H
   4
   5/*
   6 * Copyright 1995 Linus Torvalds
   7 */
   8#include <linux/mm.h>
   9#include <linux/fs.h>
  10#include <linux/list.h>
  11#include <linux/highmem.h>
  12#include <linux/compiler.h>
  13#include <linux/uaccess.h>
  14#include <linux/gfp.h>
  15#include <linux/bitops.h>
  16#include <linux/hardirq.h> /* for in_interrupt() */
  17#include <linux/hugetlb_inline.h>
  18
  19struct folio_batch;
  20
  21unsigned long invalidate_mapping_pages(struct address_space *mapping,
  22                                        pgoff_t start, pgoff_t end);
  23
  24static inline void invalidate_remote_inode(struct inode *inode)
  25{
  26        if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
  27            S_ISLNK(inode->i_mode))
  28                invalidate_mapping_pages(inode->i_mapping, 0, -1);
  29}
  30int invalidate_inode_pages2(struct address_space *mapping);
  31int invalidate_inode_pages2_range(struct address_space *mapping,
  32                pgoff_t start, pgoff_t end);
  33int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
  34void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
  35int filemap_invalidate_pages(struct address_space *mapping,
  36                             loff_t pos, loff_t end, bool nowait);
  37
  38int write_inode_now(struct inode *, int sync);
  39int filemap_fdatawrite(struct address_space *);
  40int filemap_flush(struct address_space *);
  41int filemap_fdatawait_keep_errors(struct address_space *mapping);
  42int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
  43int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
  44                loff_t start_byte, loff_t end_byte);
  45int filemap_invalidate_inode(struct inode *inode, bool flush,
  46                             loff_t start, loff_t end);
  47
  48static inline int filemap_fdatawait(struct address_space *mapping)
  49{
  50        return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
  51}
  52
  53bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
  54int filemap_write_and_wait_range(struct address_space *mapping,
  55                loff_t lstart, loff_t lend);
  56int __filemap_fdatawrite_range(struct address_space *mapping,
  57                loff_t start, loff_t end, int sync_mode);
  58int filemap_fdatawrite_range(struct address_space *mapping,
  59                loff_t start, loff_t end);
  60int filemap_check_errors(struct address_space *mapping);
  61void __filemap_set_wb_err(struct address_space *mapping, int err);
  62int filemap_fdatawrite_wbc(struct address_space *mapping,
  63                           struct writeback_control *wbc);
  64int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
  65
  66static inline int filemap_write_and_wait(struct address_space *mapping)
  67{
  68        return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
  69}
  70
  71/**
  72 * filemap_set_wb_err - set a writeback error on an address_space
  73 * @mapping: mapping in which to set writeback error
  74 * @err: error to be set in mapping
  75 *
  76 * When writeback fails in some way, we must record that error so that
  77 * userspace can be informed when fsync and the like are called.  We endeavor
  78 * to report errors on any file that was open at the time of the error.  Some
  79 * internal callers also need to know when writeback errors have occurred.
  80 *
  81 * When a writeback error occurs, most filesystems will want to call
  82 * filemap_set_wb_err to record the error in the mapping so that it will be
  83 * automatically reported whenever fsync is called on the file.
  84 */
  85static inline void filemap_set_wb_err(struct address_space *mapping, int err)
  86{
  87        /* Fastpath for common case of no error */
  88        if (unlikely(err))
  89                __filemap_set_wb_err(mapping, err);
  90}
  91
  92/**
  93 * filemap_check_wb_err - has an error occurred since the mark was sampled?
  94 * @mapping: mapping to check for writeback errors
  95 * @since: previously-sampled errseq_t
  96 *
  97 * Grab the errseq_t value from the mapping, and see if it has changed "since"
  98 * the given value was sampled.
  99 *
 100 * If it has then report the latest error set, otherwise return 0.
 101 */
 102static inline int filemap_check_wb_err(struct address_space *mapping,
 103                                        errseq_t since)
 104{
 105        return errseq_check(&mapping->wb_err, since);
 106}
 107
 108/**
 109 * filemap_sample_wb_err - sample the current errseq_t to test for later errors
 110 * @mapping: mapping to be sampled
 111 *
 112 * Writeback errors are always reported relative to a particular sample point
 113 * in the past. This function provides those sample points.
 114 */
 115static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
 116{
 117        return errseq_sample(&mapping->wb_err);
 118}
 119
 120/**
 121 * file_sample_sb_err - sample the current errseq_t to test for later errors
 122 * @file: file pointer to be sampled
 123 *
 124 * Grab the most current superblock-level errseq_t value for the given
 125 * struct file.
 126 */
 127static inline errseq_t file_sample_sb_err(struct file *file)
 128{
 129        return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
 130}
 131
 132/*
 133 * Flush file data before changing attributes.  Caller must hold any locks
 134 * required to prevent further writes to this file until we're done setting
 135 * flags.
 136 */
 137static inline int inode_drain_writes(struct inode *inode)
 138{
 139        inode_dio_wait(inode);
 140        return filemap_write_and_wait(inode->i_mapping);
 141}
 142
 143static inline bool mapping_empty(struct address_space *mapping)
 144{
 145        return xa_empty(&mapping->i_pages);
 146}
 147
 148/*
 149 * mapping_shrinkable - test if page cache state allows inode reclaim
 150 * @mapping: the page cache mapping
 151 *
 152 * This checks the mapping's cache state for the pupose of inode
 153 * reclaim and LRU management.
 154 *
 155 * The caller is expected to hold the i_lock, but is not required to
 156 * hold the i_pages lock, which usually protects cache state. That's
 157 * because the i_lock and the list_lru lock that protect the inode and
 158 * its LRU state don't nest inside the irq-safe i_pages lock.
 159 *
 160 * Cache deletions are performed under the i_lock, which ensures that
 161 * when an inode goes empty, it will reliably get queued on the LRU.
 162 *
 163 * Cache additions do not acquire the i_lock and may race with this
 164 * check, in which case we'll report the inode as shrinkable when it
 165 * has cache pages. This is okay: the shrinker also checks the
 166 * refcount and the referenced bit, which will be elevated or set in
 167 * the process of adding new cache pages to an inode.
 168 */
 169static inline bool mapping_shrinkable(struct address_space *mapping)
 170{
 171        void *head;
 172
 173        /*
 174         * On highmem systems, there could be lowmem pressure from the
 175         * inodes before there is highmem pressure from the page
 176         * cache. Make inodes shrinkable regardless of cache state.
 177         */
 178        if (IS_ENABLED(CONFIG_HIGHMEM))
 179                return true;
 180
 181        /* Cache completely empty? Shrink away. */
 182        head = rcu_access_pointer(mapping->i_pages.xa_head);
 183        if (!head)
 184                return true;
 185
 186        /*
 187         * The xarray stores single offset-0 entries directly in the
 188         * head pointer, which allows non-resident page cache entries
 189         * to escape the shadow shrinker's list of xarray nodes. The
 190         * inode shrinker needs to pick them up under memory pressure.
 191         */
 192        if (!xa_is_node(head) && xa_is_value(head))
 193                return true;
 194
 195        return false;
 196}
 197
 198/*
 199 * Bits in mapping->flags.
 200 */
 201enum mapping_flags {
 202        AS_EIO          = 0,    /* IO error on async write */
 203        AS_ENOSPC       = 1,    /* ENOSPC on async write */
 204        AS_MM_ALL_LOCKS = 2,    /* under mm_take_all_locks() */
 205        AS_UNEVICTABLE  = 3,    /* e.g., ramdisk, SHM_LOCK */
 206        AS_EXITING      = 4,    /* final truncate in progress */
 207        /* writeback related tags are not used */
 208        AS_NO_WRITEBACK_TAGS = 5,
 209        AS_RELEASE_ALWAYS = 6,  /* Call ->release_folio(), even if no private data */
 210        AS_STABLE_WRITES = 7,   /* must wait for writeback before modifying
 211                                   folio contents */
 212        AS_INACCESSIBLE = 8,    /* Do not attempt direct R/W access to the mapping */
 213        AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
 214        /* Bits 16-25 are used for FOLIO_ORDER */
 215        AS_FOLIO_ORDER_BITS = 5,
 216        AS_FOLIO_ORDER_MIN = 16,
 217        AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
 218};
 219
 220#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1)
 221#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN)
 222#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX)
 223#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
 224
 225/**
 226 * mapping_set_error - record a writeback error in the address_space
 227 * @mapping: the mapping in which an error should be set
 228 * @error: the error to set in the mapping
 229 *
 230 * When writeback fails in some way, we must record that error so that
 231 * userspace can be informed when fsync and the like are called.  We endeavor
 232 * to report errors on any file that was open at the time of the error.  Some
 233 * internal callers also need to know when writeback errors have occurred.
 234 *
 235 * When a writeback error occurs, most filesystems will want to call
 236 * mapping_set_error to record the error in the mapping so that it can be
 237 * reported when the application calls fsync(2).
 238 */
 239static inline void mapping_set_error(struct address_space *mapping, int error)
 240{
 241        if (likely(!error))
 242                return;
 243
 244        /* Record in wb_err for checkers using errseq_t based tracking */
 245        __filemap_set_wb_err(mapping, error);
 246
 247        /* Record it in superblock */
 248        if (mapping->host)
 249                errseq_set(&mapping->host->i_sb->s_wb_err, error);
 250
 251        /* Record it in flags for now, for legacy callers */
 252        if (error == -ENOSPC)
 253                set_bit(AS_ENOSPC, &mapping->flags);
 254        else
 255                set_bit(AS_EIO, &mapping->flags);
 256}
 257
 258static inline void mapping_set_unevictable(struct address_space *mapping)
 259{
 260        set_bit(AS_UNEVICTABLE, &mapping->flags);
 261}
 262
 263static inline void mapping_clear_unevictable(struct address_space *mapping)
 264{
 265        clear_bit(AS_UNEVICTABLE, &mapping->flags);
 266}
 267
 268static inline bool mapping_unevictable(struct address_space *mapping)
 269{
 270        return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
 271}
 272
 273static inline void mapping_set_exiting(struct address_space *mapping)
 274{
 275        set_bit(AS_EXITING, &mapping->flags);
 276}
 277
 278static inline int mapping_exiting(struct address_space *mapping)
 279{
 280        return test_bit(AS_EXITING, &mapping->flags);
 281}
 282
 283static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
 284{
 285        set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 286}
 287
 288static inline int mapping_use_writeback_tags(struct address_space *mapping)
 289{
 290        return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 291}
 292
 293static inline bool mapping_release_always(const struct address_space *mapping)
 294{
 295        return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 296}
 297
 298static inline void mapping_set_release_always(struct address_space *mapping)
 299{
 300        set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 301}
 302
 303static inline void mapping_clear_release_always(struct address_space *mapping)
 304{
 305        clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 306}
 307
 308static inline bool mapping_stable_writes(const struct address_space *mapping)
 309{
 310        return test_bit(AS_STABLE_WRITES, &mapping->flags);
 311}
 312
 313static inline void mapping_set_stable_writes(struct address_space *mapping)
 314{
 315        set_bit(AS_STABLE_WRITES, &mapping->flags);
 316}
 317
 318static inline void mapping_clear_stable_writes(struct address_space *mapping)
 319{
 320        clear_bit(AS_STABLE_WRITES, &mapping->flags);
 321}
 322
 323static inline void mapping_set_inaccessible(struct address_space *mapping)
 324{
 325        /*
 326         * It's expected inaccessible mappings are also unevictable. Compaction
 327         * migrate scanner (isolate_migratepages_block()) relies on this to
 328         * reduce page locking.
 329         */
 330        set_bit(AS_UNEVICTABLE, &mapping->flags);
 331        set_bit(AS_INACCESSIBLE, &mapping->flags);
 332}
 333
 334static inline bool mapping_inaccessible(struct address_space *mapping)
 335{
 336        return test_bit(AS_INACCESSIBLE, &mapping->flags);
 337}
 338
 339static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
 340{
 341        set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
 342}
 343
 344static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
 345{
 346        return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
 347}
 348
 349static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 350{
 351        return mapping->gfp_mask;
 352}
 353
 354/* Restricts the given gfp_mask to what the mapping allows. */
 355static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 356                gfp_t gfp_mask)
 357{
 358        return mapping_gfp_mask(mapping) & gfp_mask;
 359}
 360
 361/*
 362 * This is non-atomic.  Only to be used before the mapping is activated.
 363 * Probably needs a barrier...
 364 */
 365static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 366{
 367        m->gfp_mask = mask;
 368}
 369
 370/*
 371 * There are some parts of the kernel which assume that PMD entries
 372 * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
 373 * limit the maximum allocation order to PMD size.  I'm not aware of any
 374 * assumptions about maximum order if THP are disabled, but 8 seems like
 375 * a good order (that's 1MB if you're using 4kB pages)
 376 */
 377#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 378#define PREFERRED_MAX_PAGECACHE_ORDER   HPAGE_PMD_ORDER
 379#else
 380#define PREFERRED_MAX_PAGECACHE_ORDER   8
 381#endif
 382
 383/*
 384 * xas_split_alloc() does not support arbitrary orders. This implies no
 385 * 512MB THP on ARM64 with 64KB base page size.
 386 */
 387#define MAX_XAS_ORDER           (XA_CHUNK_SHIFT * 2 - 1)
 388#define MAX_PAGECACHE_ORDER     min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
 389
 390/*
 391 * mapping_max_folio_size_supported() - Check the max folio size supported
 392 *
 393 * The filesystem should call this function at mount time if there is a
 394 * requirement on the folio mapping size in the page cache.
 395 */
 396static inline size_t mapping_max_folio_size_supported(void)
 397{
 398        if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 399                return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER);
 400        return PAGE_SIZE;
 401}
 402
 403/*
 404 * mapping_set_folio_order_range() - Set the orders supported by a file.
 405 * @mapping: The address space of the file.
 406 * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
 407 * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
 408 *
 409 * The filesystem should call this function in its inode constructor to
 410 * indicate which base size (min) and maximum size (max) of folio the VFS
 411 * can use to cache the contents of the file.  This should only be used
 412 * if the filesystem needs special handling of folio sizes (ie there is
 413 * something the core cannot know).
 414 * Do not tune it based on, eg, i_size.
 415 *
 416 * Context: This should not be called while the inode is active as it
 417 * is non-atomic.
 418 */
 419static inline void mapping_set_folio_order_range(struct address_space *mapping,
 420                                                 unsigned int min,
 421                                                 unsigned int max)
 422{
 423        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 424                return;
 425
 426        if (min > MAX_PAGECACHE_ORDER)
 427                min = MAX_PAGECACHE_ORDER;
 428
 429        if (max > MAX_PAGECACHE_ORDER)
 430                max = MAX_PAGECACHE_ORDER;
 431
 432        if (max < min)
 433                max = min;
 434
 435        mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
 436                (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX);
 437}
 438
 439static inline void mapping_set_folio_min_order(struct address_space *mapping,
 440                                               unsigned int min)
 441{
 442        mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
 443}
 444
 445/**
 446 * mapping_set_large_folios() - Indicate the file supports large folios.
 447 * @mapping: The address space of the file.
 448 *
 449 * The filesystem should call this function in its inode constructor to
 450 * indicate that the VFS can use large folios to cache the contents of
 451 * the file.
 452 *
 453 * Context: This should not be called while the inode is active as it
 454 * is non-atomic.
 455 */
 456static inline void mapping_set_large_folios(struct address_space *mapping)
 457{
 458        mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
 459}
 460
 461static inline unsigned int
 462mapping_max_folio_order(const struct address_space *mapping)
 463{
 464        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 465                return 0;
 466        return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
 467}
 468
 469static inline unsigned int
 470mapping_min_folio_order(const struct address_space *mapping)
 471{
 472        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 473                return 0;
 474        return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
 475}
 476
 477static inline unsigned long
 478mapping_min_folio_nrpages(struct address_space *mapping)
 479{
 480        return 1UL << mapping_min_folio_order(mapping);
 481}
 482
 483/**
 484 * mapping_align_index() - Align index for this mapping.
 485 * @mapping: The address_space.
 486 * @index: The page index.
 487 *
 488 * The index of a folio must be naturally aligned.  If you are adding a
 489 * new folio to the page cache and need to know what index to give it,
 490 * call this function.
 491 */
 492static inline pgoff_t mapping_align_index(struct address_space *mapping,
 493                                          pgoff_t index)
 494{
 495        return round_down(index, mapping_min_folio_nrpages(mapping));
 496}
 497
 498/*
 499 * Large folio support currently depends on THP.  These dependencies are
 500 * being worked on but are not yet fixed.
 501 */
 502static inline bool mapping_large_folio_support(struct address_space *mapping)
 503{
 504        /* AS_FOLIO_ORDER is only reasonable for pagecache folios */
 505        VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON,
 506                        "Anonymous mapping always supports large folio");
 507
 508        return mapping_max_folio_order(mapping) > 0;
 509}
 510
 511/* Return the maximum folio size for this pagecache mapping, in bytes. */
 512static inline size_t mapping_max_folio_size(const struct address_space *mapping)
 513{
 514        return PAGE_SIZE << mapping_max_folio_order(mapping);
 515}
 516
 517static inline int filemap_nr_thps(struct address_space *mapping)
 518{
 519#ifdef CONFIG_READ_ONLY_THP_FOR_FS
 520        return atomic_read(&mapping->nr_thps);
 521#else
 522        return 0;
 523#endif
 524}
 525
 526static inline void filemap_nr_thps_inc(struct address_space *mapping)
 527{
 528#ifdef CONFIG_READ_ONLY_THP_FOR_FS
 529        if (!mapping_large_folio_support(mapping))
 530                atomic_inc(&mapping->nr_thps);
 531#else
 532        WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
 533#endif
 534}
 535
 536static inline void filemap_nr_thps_dec(struct address_space *mapping)
 537{
 538#ifdef CONFIG_READ_ONLY_THP_FOR_FS
 539        if (!mapping_large_folio_support(mapping))
 540                atomic_dec(&mapping->nr_thps);
 541#else
 542        WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
 543#endif
 544}
 545
 546struct address_space *folio_mapping(struct folio *);
 547
 548/**
 549 * folio_flush_mapping - Find the file mapping this folio belongs to.
 550 * @folio: The folio.
 551 *
 552 * For folios which are in the page cache, return the mapping that this
 553 * page belongs to.  Anonymous folios return NULL, even if they're in
 554 * the swap cache.  Other kinds of folio also return NULL.
 555 *
 556 * This is ONLY used by architecture cache flushing code.  If you aren't
 557 * writing cache flushing code, you want either folio_mapping() or
 558 * folio_file_mapping().
 559 */
 560static inline struct address_space *folio_flush_mapping(struct folio *folio)
 561{
 562        if (unlikely(folio_test_swapcache(folio)))
 563                return NULL;
 564
 565        return folio_mapping(folio);
 566}
 567
 568/**
 569 * folio_inode - Get the host inode for this folio.
 570 * @folio: The folio.
 571 *
 572 * For folios which are in the page cache, return the inode that this folio
 573 * belongs to.
 574 *
 575 * Do not call this for folios which aren't in the page cache.
 576 */
 577static inline struct inode *folio_inode(struct folio *folio)
 578{
 579        return folio->mapping->host;
 580}
 581
 582/**
 583 * folio_attach_private - Attach private data to a folio.
 584 * @folio: Folio to attach data to.
 585 * @data: Data to attach to folio.
 586 *
 587 * Attaching private data to a folio increments the page's reference count.
 588 * The data must be detached before the folio will be freed.
 589 */
 590static inline void folio_attach_private(struct folio *folio, void *data)
 591{
 592        folio_get(folio);
 593        folio->private = data;
 594        folio_set_private(folio);
 595}
 596
 597/**
 598 * folio_change_private - Change private data on a folio.
 599 * @folio: Folio to change the data on.
 600 * @data: Data to set on the folio.
 601 *
 602 * Change the private data attached to a folio and return the old
 603 * data.  The page must previously have had data attached and the data
 604 * must be detached before the folio will be freed.
 605 *
 606 * Return: Data that was previously attached to the folio.
 607 */
 608static inline void *folio_change_private(struct folio *folio, void *data)
 609{
 610        void *old = folio_get_private(folio);
 611
 612        folio->private = data;
 613        return old;
 614}
 615
 616/**
 617 * folio_detach_private - Detach private data from a folio.
 618 * @folio: Folio to detach data from.
 619 *
 620 * Removes the data that was previously attached to the folio and decrements
 621 * the refcount on the page.
 622 *
 623 * Return: Data that was attached to the folio.
 624 */
 625static inline void *folio_detach_private(struct folio *folio)
 626{
 627        void *data = folio_get_private(folio);
 628
 629        if (!folio_test_private(folio))
 630                return NULL;
 631        folio_clear_private(folio);
 632        folio->private = NULL;
 633        folio_put(folio);
 634
 635        return data;
 636}
 637
 638static inline void attach_page_private(struct page *page, void *data)
 639{
 640        folio_attach_private(page_folio(page), data);
 641}
 642
 643static inline void *detach_page_private(struct page *page)
 644{
 645        return folio_detach_private(page_folio(page));
 646}
 647
 648#ifdef CONFIG_NUMA
 649struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
 650#else
 651static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
 652{
 653        return folio_alloc_noprof(gfp, order);
 654}
 655#endif
 656
 657#define filemap_alloc_folio(...)                                \
 658        alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
 659
 660static inline struct page *__page_cache_alloc(gfp_t gfp)
 661{
 662        return &filemap_alloc_folio(gfp, 0)->page;
 663}
 664
 665static inline gfp_t readahead_gfp_mask(struct address_space *x)
 666{
 667        return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
 668}
 669
 670typedef int filler_t(struct file *, struct folio *);
 671
 672pgoff_t page_cache_next_miss(struct address_space *mapping,
 673                             pgoff_t index, unsigned long max_scan);
 674pgoff_t page_cache_prev_miss(struct address_space *mapping,
 675                             pgoff_t index, unsigned long max_scan);
 676
 677/**
 678 * typedef fgf_t - Flags for getting folios from the page cache.
 679 *
 680 * Most users of the page cache will not need to use these flags;
 681 * there are convenience functions such as filemap_get_folio() and
 682 * filemap_lock_folio().  For users which need more control over exactly
 683 * what is done with the folios, these flags to __filemap_get_folio()
 684 * are available.
 685 *
 686 * * %FGP_ACCESSED - The folio will be marked accessed.
 687 * * %FGP_LOCK - The folio is returned locked.
 688 * * %FGP_CREAT - If no folio is present then a new folio is allocated,
 689 *   added to the page cache and the VM's LRU list.  The folio is
 690 *   returned locked.
 691 * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
 692 *   folio is already in cache.  If the folio was allocated, unlock it
 693 *   before returning so the caller can do the same dance.
 694 * * %FGP_WRITE - The folio will be written to by the caller.
 695 * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
 696 * * %FGP_NOWAIT - Don't block on the folio lock.
 697 * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
 698 * * %FGP_DONTCACHE - Uncached buffered IO
 699 * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
 700 *   implementation.
 701 */
 702typedef unsigned int __bitwise fgf_t;
 703
 704#define FGP_ACCESSED            ((__force fgf_t)0x00000001)
 705#define FGP_LOCK                ((__force fgf_t)0x00000002)
 706#define FGP_CREAT               ((__force fgf_t)0x00000004)
 707#define FGP_WRITE               ((__force fgf_t)0x00000008)
 708#define FGP_NOFS                ((__force fgf_t)0x00000010)
 709#define FGP_NOWAIT              ((__force fgf_t)0x00000020)
 710#define FGP_FOR_MMAP            ((__force fgf_t)0x00000040)
 711#define FGP_STABLE              ((__force fgf_t)0x00000080)
 712#define FGP_DONTCACHE           ((__force fgf_t)0x00000100)
 713#define FGF_GET_ORDER(fgf)      (((__force unsigned)fgf) >> 26) /* top 6 bits */
 714
 715#define FGP_WRITEBEGIN          (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
 716
 717static inline unsigned int filemap_get_order(size_t size)
 718{
 719        unsigned int shift = ilog2(size);
 720
 721        if (shift <= PAGE_SHIFT)
 722                return 0;
 723
 724        return shift - PAGE_SHIFT;
 725}
 726
 727/**
 728 * fgf_set_order - Encode a length in the fgf_t flags.
 729 * @size: The suggested size of the folio to create.
 730 *
 731 * The caller of __filemap_get_folio() can use this to suggest a preferred
 732 * size for the folio that is created.  If there is already a folio at
 733 * the index, it will be returned, no matter what its size.  If a folio
 734 * is freshly created, it may be of a different size than requested
 735 * due to alignment constraints, memory pressure, or the presence of
 736 * other folios at nearby indices.
 737 */
 738static inline fgf_t fgf_set_order(size_t size)
 739{
 740        unsigned int order = filemap_get_order(size);
 741
 742        if (!order)
 743                return 0;
 744        return (__force fgf_t)(order << 26);
 745}
 746
 747void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
 748struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 749                fgf_t fgp_flags, gfp_t gfp);
 750struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
 751                fgf_t fgp_flags, gfp_t gfp);
 752
 753/**
 754 * write_begin_get_folio - Get folio for write_begin with flags.
 755 * @iocb: The kiocb passed from write_begin (may be NULL).
 756 * @mapping: The address space to search.
 757 * @index: The page cache index.
 758 * @len: Length of data being written.
 759 *
 760 * This is a helper for filesystem write_begin() implementations.
 761 * It wraps __filemap_get_folio(), setting appropriate flags in
 762 * the write begin context.
 763 *
 764 * Return: A folio or an ERR_PTR.
 765 */
 766static inline struct folio *write_begin_get_folio(const struct kiocb *iocb,
 767                  struct address_space *mapping, pgoff_t index, size_t len)
 768{
 769        fgf_t fgp_flags = FGP_WRITEBEGIN;
 770
 771        fgp_flags |= fgf_set_order(len);
 772
 773        if (iocb && iocb->ki_flags & IOCB_DONTCACHE)
 774                fgp_flags |= FGP_DONTCACHE;
 775
 776        return __filemap_get_folio(mapping, index, fgp_flags,
 777                                   mapping_gfp_mask(mapping));
 778}
 779
 780/**
 781 * filemap_get_folio - Find and get a folio.
 782 * @mapping: The address_space to search.
 783 * @index: The page index.
 784 *
 785 * Looks up the page cache entry at @mapping & @index.  If a folio is
 786 * present, it is returned with an increased refcount.
 787 *
 788 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
 789 * this index.  Will not return a shadow, swap or DAX entry.
 790 */
 791static inline struct folio *filemap_get_folio(struct address_space *mapping,
 792                                        pgoff_t index)
 793{
 794        return __filemap_get_folio(mapping, index, 0, 0);
 795}
 796
 797/**
 798 * filemap_lock_folio - Find and lock a folio.
 799 * @mapping: The address_space to search.
 800 * @index: The page index.
 801 *
 802 * Looks up the page cache entry at @mapping & @index.  If a folio is
 803 * present, it is returned locked with an increased refcount.
 804 *
 805 * Context: May sleep.
 806 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
 807 * this index.  Will not return a shadow, swap or DAX entry.
 808 */
 809static inline struct folio *filemap_lock_folio(struct address_space *mapping,
 810                                        pgoff_t index)
 811{
 812        return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
 813}
 814
 815/**
 816 * filemap_grab_folio - grab a folio from the page cache
 817 * @mapping: The address space to search
 818 * @index: The page index
 819 *
 820 * Looks up the page cache entry at @mapping & @index. If no folio is found,
 821 * a new folio is created. The folio is locked, marked as accessed, and
 822 * returned.
 823 *
 824 * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
 825 * and failed to create a folio.
 826 */
 827static inline struct folio *filemap_grab_folio(struct address_space *mapping,
 828                                        pgoff_t index)
 829{
 830        return __filemap_get_folio(mapping, index,
 831                        FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
 832                        mapping_gfp_mask(mapping));
 833}
 834
 835/**
 836 * find_get_page - find and get a page reference
 837 * @mapping: the address_space to search
 838 * @offset: the page index
 839 *
 840 * Looks up the page cache slot at @mapping & @offset.  If there is a
 841 * page cache page, it is returned with an increased refcount.
 842 *
 843 * Otherwise, %NULL is returned.
 844 */
 845static inline struct page *find_get_page(struct address_space *mapping,
 846                                        pgoff_t offset)
 847{
 848        return pagecache_get_page(mapping, offset, 0, 0);
 849}
 850
 851static inline struct page *find_get_page_flags(struct address_space *mapping,
 852                                        pgoff_t offset, fgf_t fgp_flags)
 853{
 854        return pagecache_get_page(mapping, offset, fgp_flags, 0);
 855}
 856
 857/**
 858 * find_lock_page - locate, pin and lock a pagecache page
 859 * @mapping: the address_space to search
 860 * @index: the page index
 861 *
 862 * Looks up the page cache entry at @mapping & @index.  If there is a
 863 * page cache page, it is returned locked and with an increased
 864 * refcount.
 865 *
 866 * Context: May sleep.
 867 * Return: A struct page or %NULL if there is no page in the cache for this
 868 * index.
 869 */
 870static inline struct page *find_lock_page(struct address_space *mapping,
 871                                        pgoff_t index)
 872{
 873        return pagecache_get_page(mapping, index, FGP_LOCK, 0);
 874}
 875
 876/**
 877 * find_or_create_page - locate or add a pagecache page
 878 * @mapping: the page's address_space
 879 * @index: the page's index into the mapping
 880 * @gfp_mask: page allocation mode
 881 *
 882 * Looks up the page cache slot at @mapping & @offset.  If there is a
 883 * page cache page, it is returned locked and with an increased
 884 * refcount.
 885 *
 886 * If the page is not present, a new page is allocated using @gfp_mask
 887 * and added to the page cache and the VM's LRU list.  The page is
 888 * returned locked and with an increased refcount.
 889 *
 890 * On memory exhaustion, %NULL is returned.
 891 *
 892 * find_or_create_page() may sleep, even if @gfp_flags specifies an
 893 * atomic allocation!
 894 */
 895static inline struct page *find_or_create_page(struct address_space *mapping,
 896                                        pgoff_t index, gfp_t gfp_mask)
 897{
 898        return pagecache_get_page(mapping, index,
 899                                        FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
 900                                        gfp_mask);
 901}
 902
 903/**
 904 * grab_cache_page_nowait - returns locked page at given index in given cache
 905 * @mapping: target address_space
 906 * @index: the page index
 907 *
 908 * Returns locked page at given index in given cache, creating it if
 909 * needed, but do not wait if the page is locked or to reclaim memory.
 910 * This is intended for speculative data generators, where the data can
 911 * be regenerated if the page couldn't be grabbed.  This routine should
 912 * be safe to call while holding the lock for another page.
 913 *
 914 * Clear __GFP_FS when allocating the page to avoid recursion into the fs
 915 * and deadlock against the caller's locked page.
 916 */
 917static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 918                                pgoff_t index)
 919{
 920        return pagecache_get_page(mapping, index,
 921                        FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
 922                        mapping_gfp_mask(mapping));
 923}
 924
 925/**
 926 * folio_next_index - Get the index of the next folio.
 927 * @folio: The current folio.
 928 *
 929 * Return: The index of the folio which follows this folio in the file.
 930 */
 931static inline pgoff_t folio_next_index(struct folio *folio)
 932{
 933        return folio->index + folio_nr_pages(folio);
 934}
 935
 936/**
 937 * folio_file_page - The page for a particular index.
 938 * @folio: The folio which contains this index.
 939 * @index: The index we want to look up.
 940 *
 941 * Sometimes after looking up a folio in the page cache, we need to
 942 * obtain the specific page for an index (eg a page fault).
 943 *
 944 * Return: The page containing the file data for this index.
 945 */
 946static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
 947{
 948        return folio_page(folio, index & (folio_nr_pages(folio) - 1));
 949}
 950
 951/**
 952 * folio_contains - Does this folio contain this index?
 953 * @folio: The folio.
 954 * @index: The page index within the file.
 955 *
 956 * Context: The caller should have the folio locked and ensure
 957 * e.g., shmem did not move this folio to the swap cache.
 958 * Return: true or false.
 959 */
 960static inline bool folio_contains(struct folio *folio, pgoff_t index)
 961{
 962        VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
 963        return index - folio->index < folio_nr_pages(folio);
 964}
 965
 966unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
 967                pgoff_t end, struct folio_batch *fbatch);
 968unsigned filemap_get_folios_contig(struct address_space *mapping,
 969                pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
 970unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
 971                pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
 972
 973struct folio *read_cache_folio(struct address_space *, pgoff_t index,
 974                filler_t *filler, struct file *file);
 975struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index,
 976                gfp_t flags);
 977struct page *read_cache_page(struct address_space *, pgoff_t index,
 978                filler_t *filler, struct file *file);
 979extern struct page * read_cache_page_gfp(struct address_space *mapping,
 980                                pgoff_t index, gfp_t gfp_mask);
 981
 982static inline struct page *read_mapping_page(struct address_space *mapping,
 983                                pgoff_t index, struct file *file)
 984{
 985        return read_cache_page(mapping, index, NULL, file);
 986}
 987
 988static inline struct folio *read_mapping_folio(struct address_space *mapping,
 989                                pgoff_t index, struct file *file)
 990{
 991        return read_cache_folio(mapping, index, NULL, file);
 992}
 993
 994/**
 995 * page_pgoff - Calculate the logical page offset of this page.
 996 * @folio: The folio containing this page.
 997 * @page: The page which we need the offset of.
 998 *
 999 * For file pages, this is the offset from the beginning of the file
1000 * in units of PAGE_SIZE.  For anonymous pages, this is the offset from
1001 * the beginning of the anon_vma in units of PAGE_SIZE.  This will
1002 * return nonsense for KSM pages.
1003 *
1004 * Context: Caller must have a reference on the folio or otherwise
1005 * prevent it from being split or freed.
1006 *
1007 * Return: The offset in units of PAGE_SIZE.
1008 */
1009static inline pgoff_t page_pgoff(const struct folio *folio,
1010                const struct page *page)
1011{
1012        return folio->index + folio_page_idx(folio, page);
1013}
1014
1015/**
1016 * folio_pos - Returns the byte position of this folio in its file.
1017 * @folio: The folio.
1018 */
1019static inline loff_t folio_pos(const struct folio *folio)
1020{
1021        return ((loff_t)folio->index) * PAGE_SIZE;
1022}
1023
1024/*
1025 * Return byte-offset into filesystem object for page.
1026 */
1027static inline loff_t page_offset(struct page *page)
1028{
1029        struct folio *folio = page_folio(page);
1030
1031        return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE;
1032}
1033
1034/*
1035 * Get the offset in PAGE_SIZE (even for hugetlb folios).
1036 */
1037static inline pgoff_t folio_pgoff(struct folio *folio)
1038{
1039        return folio->index;
1040}
1041
1042static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
1043                                        unsigned long address)
1044{
1045        pgoff_t pgoff;
1046        pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
1047        pgoff += vma->vm_pgoff;
1048        return pgoff;
1049}
1050
1051struct wait_page_key {
1052        struct folio *folio;
1053        int bit_nr;
1054        int page_match;
1055};
1056
1057struct wait_page_queue {
1058        struct folio *folio;
1059        int bit_nr;
1060        wait_queue_entry_t wait;
1061};
1062
1063static inline bool wake_page_match(struct wait_page_queue *wait_page,
1064                                  struct wait_page_key *key)
1065{
1066        if (wait_page->folio != key->folio)
1067               return false;
1068        key->page_match = 1;
1069
1070        if (wait_page->bit_nr != key->bit_nr)
1071                return false;
1072
1073        return true;
1074}
1075
1076void __folio_lock(struct folio *folio);
1077int __folio_lock_killable(struct folio *folio);
1078vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
1079void unlock_page(struct page *page);
1080void folio_unlock(struct folio *folio);
1081
1082/**
1083 * folio_trylock() - Attempt to lock a folio.
1084 * @folio: The folio to attempt to lock.
1085 *
1086 * Sometimes it is undesirable to wait for a folio to be unlocked (eg
1087 * when the locks are being taken in the wrong order, or if making
1088 * progress through a batch of folios is more important than processing
1089 * them in order).  Usually folio_lock() is the correct function to call.
1090 *
1091 * Context: Any context.
1092 * Return: Whether the lock was successfully acquired.
1093 */
1094static inline bool folio_trylock(struct folio *folio)
1095{
1096        return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
1097}
1098
1099/*
1100 * Return true if the page was successfully locked
1101 */
1102static inline bool trylock_page(struct page *page)
1103{
1104        return folio_trylock(page_folio(page));
1105}
1106
1107/**
1108 * folio_lock() - Lock this folio.
1109 * @folio: The folio to lock.
1110 *
1111 * The folio lock protects against many things, probably more than it
1112 * should.  It is primarily held while a folio is being brought uptodate,
1113 * either from its backing file or from swap.  It is also held while a
1114 * folio is being truncated from its address_space, so holding the lock
1115 * is sufficient to keep folio->mapping stable.
1116 *
1117 * The folio lock is also held while write() is modifying the page to
1118 * provide POSIX atomicity guarantees (as long as the write does not
1119 * cross a page boundary).  Other modifications to the data in the folio
1120 * do not hold the folio lock and can race with writes, eg DMA and stores
1121 * to mapped pages.
1122 *
1123 * Context: May sleep.  If you need to acquire the locks of two or
1124 * more folios, they must be in order of ascending index, if they are
1125 * in the same address_space.  If they are in different address_spaces,
1126 * acquire the lock of the folio which belongs to the address_space which
1127 * has the lowest address in memory first.
1128 */
1129static inline void folio_lock(struct folio *folio)
1130{
1131        might_sleep();
1132        if (!folio_trylock(folio))
1133                __folio_lock(folio);
1134}
1135
1136/**
1137 * lock_page() - Lock the folio containing this page.
1138 * @page: The page to lock.
1139 *
1140 * See folio_lock() for a description of what the lock protects.
1141 * This is a legacy function and new code should probably use folio_lock()
1142 * instead.
1143 *
1144 * Context: May sleep.  Pages in the same folio share a lock, so do not
1145 * attempt to lock two pages which share a folio.
1146 */
1147static inline void lock_page(struct page *page)
1148{
1149        struct folio *folio;
1150        might_sleep();
1151
1152        folio = page_folio(page);
1153        if (!folio_trylock(folio))
1154                __folio_lock(folio);
1155}
1156
1157/**
1158 * folio_lock_killable() - Lock this folio, interruptible by a fatal signal.
1159 * @folio: The folio to lock.
1160 *
1161 * Attempts to lock the folio, like folio_lock(), except that the sleep
1162 * to acquire the lock is interruptible by a fatal signal.
1163 *
1164 * Context: May sleep; see folio_lock().
1165 * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received.
1166 */
1167static inline int folio_lock_killable(struct folio *folio)
1168{
1169        might_sleep();
1170        if (!folio_trylock(folio))
1171                return __folio_lock_killable(folio);
1172        return 0;
1173}
1174
1175/*
1176 * folio_lock_or_retry - Lock the folio, unless this would block and the
1177 * caller indicated that it can handle a retry.
1178 *
1179 * Return value and mmap_lock implications depend on flags; see
1180 * __folio_lock_or_retry().
1181 */
1182static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
1183                                             struct vm_fault *vmf)
1184{
1185        might_sleep();
1186        if (!folio_trylock(folio))
1187                return __folio_lock_or_retry(folio, vmf);
1188        return 0;
1189}
1190
1191/*
1192 * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
1193 * and should not be used directly.
1194 */
1195void folio_wait_bit(struct folio *folio, int bit_nr);
1196int folio_wait_bit_killable(struct folio *folio, int bit_nr);
1197
1198/* 
1199 * Wait for a folio to be unlocked.
1200 *
1201 * This must be called with the caller "holding" the folio,
1202 * ie with increased folio reference count so that the folio won't
1203 * go away during the wait.
1204 */
1205static inline void folio_wait_locked(struct folio *folio)
1206{
1207        if (folio_test_locked(folio))
1208                folio_wait_bit(folio, PG_locked);
1209}
1210
1211static inline int folio_wait_locked_killable(struct folio *folio)
1212{
1213        if (!folio_test_locked(folio))
1214                return 0;
1215        return folio_wait_bit_killable(folio, PG_locked);
1216}
1217
1218void folio_end_read(struct folio *folio, bool success);
1219void wait_on_page_writeback(struct page *page);
1220void folio_wait_writeback(struct folio *folio);
1221int folio_wait_writeback_killable(struct folio *folio);
1222void end_page_writeback(struct page *page);
1223void folio_end_writeback(struct folio *folio);
1224void folio_wait_stable(struct folio *folio);
1225void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
1226void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
1227void __folio_cancel_dirty(struct folio *folio);
1228static inline void folio_cancel_dirty(struct folio *folio)
1229{
1230        /* Avoid atomic ops, locking, etc. when not actually needed. */
1231        if (folio_test_dirty(folio))
1232                __folio_cancel_dirty(folio);
1233}
1234bool folio_clear_dirty_for_io(struct folio *folio);
1235bool clear_page_dirty_for_io(struct page *page);
1236void folio_invalidate(struct folio *folio, size_t offset, size_t length);
1237bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
1238
1239#ifdef CONFIG_MIGRATION
1240int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
1241                struct folio *src, enum migrate_mode mode);
1242#else
1243#define filemap_migrate_folio NULL
1244#endif
1245void folio_end_private_2(struct folio *folio);
1246void folio_wait_private_2(struct folio *folio);
1247int folio_wait_private_2_killable(struct folio *folio);
1248
1249/*
1250 * Fault in userspace address range.
1251 */
1252size_t fault_in_writeable(char __user *uaddr, size_t size);
1253size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
1254size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
1255size_t fault_in_readable(const char __user *uaddr, size_t size);
1256
1257int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
1258                pgoff_t index, gfp_t gfp);
1259int filemap_add_folio(struct address_space *mapping, struct folio *folio,
1260                pgoff_t index, gfp_t gfp);
1261void filemap_remove_folio(struct folio *folio);
1262void __filemap_remove_folio(struct folio *folio, void *shadow);
1263void replace_page_cache_folio(struct folio *old, struct folio *new);
1264void delete_from_page_cache_batch(struct address_space *mapping,
1265                                  struct folio_batch *fbatch);
1266bool filemap_release_folio(struct folio *folio, gfp_t gfp);
1267loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
1268                int whence);
1269
1270/* Must be non-static for BPF error injection */
1271int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
1272                pgoff_t index, gfp_t gfp, void **shadowp);
1273
1274bool filemap_range_has_writeback(struct address_space *mapping,
1275                                 loff_t start_byte, loff_t end_byte);
1276
1277/**
1278 * filemap_range_needs_writeback - check if range potentially needs writeback
1279 * @mapping:           address space within which to check
1280 * @start_byte:        offset in bytes where the range starts
1281 * @end_byte:          offset in bytes where the range ends (inclusive)
1282 *
1283 * Find at least one page in the range supplied, usually used to check if
1284 * direct writing in this range will trigger a writeback. Used by O_DIRECT
1285 * read/write with IOCB_NOWAIT, to see if the caller needs to do
1286 * filemap_write_and_wait_range() before proceeding.
1287 *
1288 * Return: %true if the caller should do filemap_write_and_wait_range() before
1289 * doing O_DIRECT to a page in this range, %false otherwise.
1290 */
1291static inline bool filemap_range_needs_writeback(struct address_space *mapping,
1292                                                 loff_t start_byte,
1293                                                 loff_t end_byte)
1294{
1295        if (!mapping->nrpages)
1296                return false;
1297        if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
1298            !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
1299                return false;
1300        return filemap_range_has_writeback(mapping, start_byte, end_byte);
1301}
1302
1303/**
1304 * struct readahead_control - Describes a readahead request.
1305 *
1306 * A readahead request is for consecutive pages.  Filesystems which
1307 * implement the ->readahead method should call readahead_folio() or
1308 * __readahead_batch() in a loop and attempt to start reads into each
1309 * folio in the request.
1310 *
1311 * Most of the fields in this struct are private and should be accessed
1312 * by the functions below.
1313 *
1314 * @file: The file, used primarily by network filesystems for authentication.
1315 *        May be NULL if invoked internally by the filesystem.
1316 * @mapping: Readahead this filesystem object.
1317 * @ra: File readahead state.  May be NULL.
1318 */
1319struct readahead_control {
1320        struct file *file;
1321        struct address_space *mapping;
1322        struct file_ra_state *ra;
1323/* private: use the readahead_* accessors instead */
1324        pgoff_t _index;
1325        unsigned int _nr_pages;
1326        unsigned int _batch_count;
1327        bool dropbehind;
1328        bool _workingset;
1329        unsigned long _pflags;
1330};
1331
1332#define DEFINE_READAHEAD(ractl, f, r, m, i)                             \
1333        struct readahead_control ractl = {                              \
1334                .file = f,                                              \
1335                .mapping = m,                                           \
1336                .ra = r,                                                \
1337                ._index = i,                                            \
1338        }
1339
1340#define VM_READAHEAD_PAGES      (SZ_128K / PAGE_SIZE)
1341
1342void page_cache_ra_unbounded(struct readahead_control *,
1343                unsigned long nr_to_read, unsigned long lookahead_count);
1344void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
1345void page_cache_async_ra(struct readahead_control *, struct folio *,
1346                unsigned long req_count);
1347void readahead_expand(struct readahead_control *ractl,
1348                      loff_t new_start, size_t new_len);
1349
1350/**
1351 * page_cache_sync_readahead - generic file readahead
1352 * @mapping: address_space which holds the pagecache and I/O vectors
1353 * @ra: file_ra_state which holds the readahead state
1354 * @file: Used by the filesystem for authentication.
1355 * @index: Index of first page to be read.
1356 * @req_count: Total number of pages being read by the caller.
1357 *
1358 * page_cache_sync_readahead() should be called when a cache miss happened:
1359 * it will submit the read.  The readahead logic may decide to piggyback more
1360 * pages onto the read request if access patterns suggest it will improve
1361 * performance.
1362 */
1363static inline
1364void page_cache_sync_readahead(struct address_space *mapping,
1365                struct file_ra_state *ra, struct file *file, pgoff_t index,
1366                unsigned long req_count)
1367{
1368        DEFINE_READAHEAD(ractl, file, ra, mapping, index);
1369        page_cache_sync_ra(&ractl, req_count);
1370}
1371
1372/**
1373 * page_cache_async_readahead - file readahead for marked pages
1374 * @mapping: address_space which holds the pagecache and I/O vectors
1375 * @ra: file_ra_state which holds the readahead state
1376 * @file: Used by the filesystem for authentication.
1377 * @folio: The folio which triggered the readahead call.
1378 * @req_count: Total number of pages being read by the caller.
1379 *
1380 * page_cache_async_readahead() should be called when a page is used which
1381 * is marked as PageReadahead; this is a marker to suggest that the application
1382 * has used up enough of the readahead window that we should start pulling in
1383 * more pages.
1384 */
1385static inline
1386void page_cache_async_readahead(struct address_space *mapping,
1387                struct file_ra_state *ra, struct file *file,
1388                struct folio *folio, unsigned long req_count)
1389{
1390        DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index);
1391        page_cache_async_ra(&ractl, folio, req_count);
1392}
1393
1394static inline struct folio *__readahead_folio(struct readahead_control *ractl)
1395{
1396        struct folio *folio;
1397
1398        BUG_ON(ractl->_batch_count > ractl->_nr_pages);
1399        ractl->_nr_pages -= ractl->_batch_count;
1400        ractl->_index += ractl->_batch_count;
1401
1402        if (!ractl->_nr_pages) {
1403                ractl->_batch_count = 0;
1404                return NULL;
1405        }
1406
1407        folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
1408        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1409        ractl->_batch_count = folio_nr_pages(folio);
1410
1411        return folio;
1412}
1413
1414/**
1415 * readahead_folio - Get the next folio to read.
1416 * @ractl: The current readahead request.
1417 *
1418 * Context: The folio is locked.  The caller should unlock the folio once
1419 * all I/O to that folio has completed.
1420 * Return: A pointer to the next folio, or %NULL if we are done.
1421 */
1422static inline struct folio *readahead_folio(struct readahead_control *ractl)
1423{
1424        struct folio *folio = __readahead_folio(ractl);
1425
1426        if (folio)
1427                folio_put(folio);
1428        return folio;
1429}
1430
1431static inline unsigned int __readahead_batch(struct readahead_control *rac,
1432                struct page **array, unsigned int array_sz)
1433{
1434        unsigned int i = 0;
1435        XA_STATE(xas, &rac->mapping->i_pages, 0);
1436        struct folio *folio;
1437
1438        BUG_ON(rac->_batch_count > rac->_nr_pages);
1439        rac->_nr_pages -= rac->_batch_count;
1440        rac->_index += rac->_batch_count;
1441        rac->_batch_count = 0;
1442
1443        xas_set(&xas, rac->_index);
1444        rcu_read_lock();
1445        xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) {
1446                if (xas_retry(&xas, folio))
1447                        continue;
1448                VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1449                array[i++] = folio_page(folio, 0);
1450                rac->_batch_count += folio_nr_pages(folio);
1451                if (i == array_sz)
1452                        break;
1453        }
1454        rcu_read_unlock();
1455
1456        return i;
1457}
1458
1459/**
1460 * readahead_pos - The byte offset into the file of this readahead request.
1461 * @rac: The readahead request.
1462 */
1463static inline loff_t readahead_pos(struct readahead_control *rac)
1464{
1465        return (loff_t)rac->_index * PAGE_SIZE;
1466}
1467
1468/**
1469 * readahead_length - The number of bytes in this readahead request.
1470 * @rac: The readahead request.
1471 */
1472static inline size_t readahead_length(struct readahead_control *rac)
1473{
1474        return rac->_nr_pages * PAGE_SIZE;
1475}
1476
1477/**
1478 * readahead_index - The index of the first page in this readahead request.
1479 * @rac: The readahead request.
1480 */
1481static inline pgoff_t readahead_index(struct readahead_control *rac)
1482{
1483        return rac->_index;
1484}
1485
1486/**
1487 * readahead_count - The number of pages in this readahead request.
1488 * @rac: The readahead request.
1489 */
1490static inline unsigned int readahead_count(struct readahead_control *rac)
1491{
1492        return rac->_nr_pages;
1493}
1494
1495/**
1496 * readahead_batch_length - The number of bytes in the current batch.
1497 * @rac: The readahead request.
1498 */
1499static inline size_t readahead_batch_length(struct readahead_control *rac)
1500{
1501        return rac->_batch_count * PAGE_SIZE;
1502}
1503
1504static inline unsigned long dir_pages(struct inode *inode)
1505{
1506        return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
1507                               PAGE_SHIFT;
1508}
1509
1510/**
1511 * folio_mkwrite_check_truncate - check if folio was truncated
1512 * @folio: the folio to check
1513 * @inode: the inode to check the folio against
1514 *
1515 * Return: the number of bytes in the folio up to EOF,
1516 * or -EFAULT if the folio was truncated.
1517 */
1518static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
1519                                              struct inode *inode)
1520{
1521        loff_t size = i_size_read(inode);
1522        pgoff_t index = size >> PAGE_SHIFT;
1523        size_t offset = offset_in_folio(folio, size);
1524
1525        if (!folio->mapping)
1526                return -EFAULT;
1527
1528        /* folio is wholly inside EOF */
1529        if (folio_next_index(folio) - 1 < index)
1530                return folio_size(folio);
1531        /* folio is wholly past EOF */
1532        if (folio->index > index || !offset)
1533                return -EFAULT;
1534        /* folio is partially inside EOF */
1535        return offset;
1536}
1537
1538/**
1539 * i_blocks_per_folio - How many blocks fit in this folio.
1540 * @inode: The inode which contains the blocks.
1541 * @folio: The folio.
1542 *
1543 * If the block size is larger than the size of this folio, return zero.
1544 *
1545 * Context: The caller should hold a refcount on the folio to prevent it
1546 * from being split.
1547 * Return: The number of filesystem blocks covered by this folio.
1548 */
1549static inline
1550unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
1551{
1552        return folio_size(folio) >> inode->i_blkbits;
1553}
1554#endif /* _LINUX_PAGEMAP_H */
1555