linux/mm/fadvise.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * mm/fadvise.c
   4 *
   5 * Copyright (C) 2002, Linus Torvalds
   6 *
   7 * 11Jan2003    Andrew Morton
   8 *              Initial version.
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/file.h>
  13#include <linux/fs.h>
  14#include <linux/mm.h>
  15#include <linux/pagemap.h>
  16#include <linux/backing-dev.h>
  17#include <linux/pagevec.h>
  18#include <linux/fadvise.h>
  19#include <linux/writeback.h>
  20#include <linux/syscalls.h>
  21#include <linux/swap.h>
  22
  23#include <asm/unistd.h>
  24
  25#include "internal.h"
  26
  27/*
  28 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
  29 * deactivate the pages and clear PG_Referenced.
  30 */
  31
  32int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
  33{
  34        struct inode *inode;
  35        struct address_space *mapping;
  36        struct backing_dev_info *bdi;
  37        loff_t endbyte;                 /* inclusive */
  38        pgoff_t start_index;
  39        pgoff_t end_index;
  40        unsigned long nrpages;
  41
  42        inode = file_inode(file);
  43        if (S_ISFIFO(inode->i_mode))
  44                return -ESPIPE;
  45
  46        mapping = file->f_mapping;
  47        if (!mapping || len < 0)
  48                return -EINVAL;
  49
  50        bdi = inode_to_bdi(mapping->host);
  51
  52        if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
  53                switch (advice) {
  54                case POSIX_FADV_NORMAL:
  55                case POSIX_FADV_RANDOM:
  56                case POSIX_FADV_SEQUENTIAL:
  57                case POSIX_FADV_WILLNEED:
  58                case POSIX_FADV_NOREUSE:
  59                case POSIX_FADV_DONTNEED:
  60                        /* no bad return value, but ignore advice */
  61                        break;
  62                default:
  63                        return -EINVAL;
  64                }
  65                return 0;
  66        }
  67
  68        /*
  69         * Careful about overflows. Len == 0 means "as much as possible".  Use
  70         * unsigned math because signed overflows are undefined and UBSan
  71         * complains.
  72         */
  73        endbyte = (u64)offset + (u64)len;
  74        if (!len || endbyte < len)
  75                endbyte = -1;
  76        else
  77                endbyte--;              /* inclusive */
  78
  79        switch (advice) {
  80        case POSIX_FADV_NORMAL:
  81                file->f_ra.ra_pages = bdi->ra_pages;
  82                spin_lock(&file->f_lock);
  83                file->f_mode &= ~FMODE_RANDOM;
  84                spin_unlock(&file->f_lock);
  85                break;
  86        case POSIX_FADV_RANDOM:
  87                spin_lock(&file->f_lock);
  88                file->f_mode |= FMODE_RANDOM;
  89                spin_unlock(&file->f_lock);
  90                break;
  91        case POSIX_FADV_SEQUENTIAL:
  92                file->f_ra.ra_pages = bdi->ra_pages * 2;
  93                spin_lock(&file->f_lock);
  94                file->f_mode &= ~FMODE_RANDOM;
  95                spin_unlock(&file->f_lock);
  96                break;
  97        case POSIX_FADV_WILLNEED:
  98                /* First and last PARTIAL page! */
  99                start_index = offset >> PAGE_SHIFT;
 100                end_index = endbyte >> PAGE_SHIFT;
 101
 102                /* Careful about overflow on the "+1" */
 103                nrpages = end_index - start_index + 1;
 104                if (!nrpages)
 105                        nrpages = ~0UL;
 106
 107                force_page_cache_readahead(mapping, file, start_index, nrpages);
 108                break;
 109        case POSIX_FADV_NOREUSE:
 110                break;
 111        case POSIX_FADV_DONTNEED:
 112                if (!inode_write_congested(mapping->host))
 113                        __filemap_fdatawrite_range(mapping, offset, endbyte,
 114                                                   WB_SYNC_NONE);
 115
 116                /*
 117                 * First and last FULL page! Partial pages are deliberately
 118                 * preserved on the expectation that it is better to preserve
 119                 * needed memory than to discard unneeded memory.
 120                 */
 121                start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
 122                end_index = (endbyte >> PAGE_SHIFT);
 123                /*
 124                 * The page at end_index will be inclusively discarded according
 125                 * by invalidate_mapping_pages(), so subtracting 1 from
 126                 * end_index means we will skip the last page.  But if endbyte
 127                 * is page aligned or is at the end of file, we should not skip
 128                 * that page - discarding the last page is safe enough.
 129                 */
 130                if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
 131                                endbyte != inode->i_size - 1) {
 132                        /* First page is tricky as 0 - 1 = -1, but pgoff_t
 133                         * is unsigned, so the end_index >= start_index
 134                         * check below would be true and we'll discard the whole
 135                         * file cache which is not what was asked.
 136                         */
 137                        if (end_index == 0)
 138                                break;
 139
 140                        end_index--;
 141                }
 142
 143                if (end_index >= start_index) {
 144                        unsigned long nr_pagevec = 0;
 145
 146                        /*
 147                         * It's common to FADV_DONTNEED right after
 148                         * the read or write that instantiates the
 149                         * pages, in which case there will be some
 150                         * sitting on the local LRU cache. Try to
 151                         * avoid the expensive remote drain and the
 152                         * second cache tree walk below by flushing
 153                         * them out right away.
 154                         */
 155                        lru_add_drain();
 156
 157                        invalidate_mapping_pagevec(mapping,
 158                                                start_index, end_index,
 159                                                &nr_pagevec);
 160
 161                        /*
 162                         * If fewer pages were invalidated than expected then
 163                         * it is possible that some of the pages were on
 164                         * a per-cpu pagevec for a remote CPU. Drain all
 165                         * pagevecs and try again.
 166                         */
 167                        if (nr_pagevec) {
 168                                lru_add_drain_all();
 169                                invalidate_mapping_pages(mapping, start_index,
 170                                                end_index);
 171                        }
 172                }
 173                break;
 174        default:
 175                return -EINVAL;
 176        }
 177        return 0;
 178}
 179EXPORT_SYMBOL(generic_fadvise);
 180
 181int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 182{
 183        if (file->f_op->fadvise)
 184                return file->f_op->fadvise(file, offset, len, advice);
 185
 186        return generic_fadvise(file, offset, len, advice);
 187}
 188EXPORT_SYMBOL(vfs_fadvise);
 189
 190#ifdef CONFIG_ADVISE_SYSCALLS
 191
 192int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 193{
 194        struct fd f = fdget(fd);
 195        int ret;
 196
 197        if (!f.file)
 198                return -EBADF;
 199
 200        ret = vfs_fadvise(f.file, offset, len, advice);
 201
 202        fdput(f);
 203        return ret;
 204}
 205
 206SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 207{
 208        return ksys_fadvise64_64(fd, offset, len, advice);
 209}
 210
 211#ifdef __ARCH_WANT_SYS_FADVISE64
 212
 213SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
 214{
 215        return ksys_fadvise64_64(fd, offset, len, advice);
 216}
 217
 218#endif
 219#endif
 220