linux/mm/fadvise.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * mm/fadvise.c
   4 *
   5 * Copyright (C) 2002, Linus Torvalds
   6 *
   7 * 11Jan2003    Andrew Morton
   8 *              Initial version.
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/file.h>
  13#include <linux/fs.h>
  14#include <linux/mm.h>
  15#include <linux/pagemap.h>
  16#include <linux/backing-dev.h>
  17#include <linux/pagevec.h>
  18#include <linux/fadvise.h>
  19#include <linux/writeback.h>
  20#include <linux/syscalls.h>
  21#include <linux/swap.h>
  22
  23#include <asm/unistd.h>
  24
  25#include "internal.h"
  26
  27/*
  28 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
  29 * deactivate the pages and clear PG_Referenced.
  30 */
  31
  32int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
  33{
  34        struct inode *inode;
  35        struct address_space *mapping;
  36        struct backing_dev_info *bdi;
  37        loff_t endbyte;                 /* inclusive */
  38        pgoff_t start_index;
  39        pgoff_t end_index;
  40        unsigned long nrpages;
  41
  42        inode = file_inode(file);
  43        if (S_ISFIFO(inode->i_mode))
  44                return -ESPIPE;
  45
  46        mapping = file->f_mapping;
  47        if (!mapping || len < 0)
  48                return -EINVAL;
  49
  50        bdi = inode_to_bdi(mapping->host);
  51
  52        if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
  53                switch (advice) {
  54                case POSIX_FADV_NORMAL:
  55                case POSIX_FADV_RANDOM:
  56                case POSIX_FADV_SEQUENTIAL:
  57                case POSIX_FADV_WILLNEED:
  58                case POSIX_FADV_NOREUSE:
  59                case POSIX_FADV_DONTNEED:
  60                        /* no bad return value, but ignore advice */
  61                        break;
  62                default:
  63                        return -EINVAL;
  64                }
  65                return 0;
  66        }
  67
  68        /*
  69         * Careful about overflows. Len == 0 means "as much as possible".  Use
  70         * unsigned math because signed overflows are undefined and UBSan
  71         * complains.
  72         */
  73        endbyte = (u64)offset + (u64)len;
  74        if (!len || endbyte < len)
  75                endbyte = -1;
  76        else
  77                endbyte--;              /* inclusive */
  78
  79        switch (advice) {
  80        case POSIX_FADV_NORMAL:
  81                file->f_ra.ra_pages = bdi->ra_pages;
  82                spin_lock(&file->f_lock);
  83                file->f_mode &= ~FMODE_RANDOM;
  84                spin_unlock(&file->f_lock);
  85                break;
  86        case POSIX_FADV_RANDOM:
  87                spin_lock(&file->f_lock);
  88                file->f_mode |= FMODE_RANDOM;
  89                spin_unlock(&file->f_lock);
  90                break;
  91        case POSIX_FADV_SEQUENTIAL:
  92                file->f_ra.ra_pages = bdi->ra_pages * 2;
  93                spin_lock(&file->f_lock);
  94                file->f_mode &= ~FMODE_RANDOM;
  95                spin_unlock(&file->f_lock);
  96                break;
  97        case POSIX_FADV_WILLNEED:
  98                /* First and last PARTIAL page! */
  99                start_index = offset >> PAGE_SHIFT;
 100                end_index = endbyte >> PAGE_SHIFT;
 101
 102                /* Careful about overflow on the "+1" */
 103                nrpages = end_index - start_index + 1;
 104                if (!nrpages)
 105                        nrpages = ~0UL;
 106
 107                force_page_cache_readahead(mapping, file, start_index, nrpages);
 108                break;
 109        case POSIX_FADV_NOREUSE:
 110                break;
 111        case POSIX_FADV_DONTNEED:
 112                __filemap_fdatawrite_range(mapping, offset, endbyte,
 113                                           WB_SYNC_NONE);
 114
 115                /*
 116                 * First and last FULL page! Partial pages are deliberately
 117                 * preserved on the expectation that it is better to preserve
 118                 * needed memory than to discard unneeded memory.
 119                 */
 120                start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
 121                end_index = (endbyte >> PAGE_SHIFT);
 122                /*
 123                 * The page at end_index will be inclusively discarded according
 124                 * by invalidate_mapping_pages(), so subtracting 1 from
 125                 * end_index means we will skip the last page.  But if endbyte
 126                 * is page aligned or is at the end of file, we should not skip
 127                 * that page - discarding the last page is safe enough.
 128                 */
 129                if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
 130                                endbyte != inode->i_size - 1) {
 131                        /* First page is tricky as 0 - 1 = -1, but pgoff_t
 132                         * is unsigned, so the end_index >= start_index
 133                         * check below would be true and we'll discard the whole
 134                         * file cache which is not what was asked.
 135                         */
 136                        if (end_index == 0)
 137                                break;
 138
 139                        end_index--;
 140                }
 141
 142                if (end_index >= start_index) {
 143                        unsigned long nr_pagevec = 0;
 144
 145                        /*
 146                         * It's common to FADV_DONTNEED right after
 147                         * the read or write that instantiates the
 148                         * pages, in which case there will be some
 149                         * sitting on the local LRU cache. Try to
 150                         * avoid the expensive remote drain and the
 151                         * second cache tree walk below by flushing
 152                         * them out right away.
 153                         */
 154                        lru_add_drain();
 155
 156                        invalidate_mapping_pagevec(mapping,
 157                                                start_index, end_index,
 158                                                &nr_pagevec);
 159
 160                        /*
 161                         * If fewer pages were invalidated than expected then
 162                         * it is possible that some of the pages were on
 163                         * a per-cpu pagevec for a remote CPU. Drain all
 164                         * pagevecs and try again.
 165                         */
 166                        if (nr_pagevec) {
 167                                lru_add_drain_all();
 168                                invalidate_mapping_pages(mapping, start_index,
 169                                                end_index);
 170                        }
 171                }
 172                break;
 173        default:
 174                return -EINVAL;
 175        }
 176        return 0;
 177}
 178EXPORT_SYMBOL(generic_fadvise);
 179
 180int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 181{
 182        if (file->f_op->fadvise)
 183                return file->f_op->fadvise(file, offset, len, advice);
 184
 185        return generic_fadvise(file, offset, len, advice);
 186}
 187EXPORT_SYMBOL(vfs_fadvise);
 188
 189#ifdef CONFIG_ADVISE_SYSCALLS
 190
 191int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 192{
 193        struct fd f = fdget(fd);
 194        int ret;
 195
 196        if (!f.file)
 197                return -EBADF;
 198
 199        ret = vfs_fadvise(f.file, offset, len, advice);
 200
 201        fdput(f);
 202        return ret;
 203}
 204
 205SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 206{
 207        return ksys_fadvise64_64(fd, offset, len, advice);
 208}
 209
 210#ifdef __ARCH_WANT_SYS_FADVISE64
 211
 212SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
 213{
 214        return ksys_fadvise64_64(fd, offset, len, advice);
 215}
 216
 217#endif
 218
 219#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64)
 220
 221COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset),
 222                       compat_arg_u64_dual(len), int, advice)
 223{
 224        return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset),
 225                                 compat_arg_u64_glue(len), advice);
 226}
 227
 228#endif
 229#endif
 230