linux/mm/fadvise.c
<<
>>
Prefs
   1/*
   2 * mm/fadvise.c
   3 *
   4 * Copyright (C) 2002, Linus Torvalds
   5 *
   6 * 11Jan2003    Andrew Morton
   7 *              Initial version.
   8 */
   9
  10#include <linux/kernel.h>
  11#include <linux/file.h>
  12#include <linux/fs.h>
  13#include <linux/mm.h>
  14#include <linux/pagemap.h>
  15#include <linux/backing-dev.h>
  16#include <linux/pagevec.h>
  17#include <linux/fadvise.h>
  18#include <linux/writeback.h>
  19#include <linux/syscalls.h>
  20#include <linux/swap.h>
  21
  22#include <asm/unistd.h>
  23
  24/*
  25 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
  26 * deactivate the pages and clear PG_Referenced.
  27 */
  28SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
  29{
  30        struct fd f = fdget(fd);
  31        struct inode *inode;
  32        struct address_space *mapping;
  33        struct backing_dev_info *bdi;
  34        loff_t endbyte;                 /* inclusive */
  35        pgoff_t start_index;
  36        pgoff_t end_index;
  37        unsigned long nrpages;
  38        int ret = 0;
  39
  40        if (!f.file)
  41                return -EBADF;
  42
  43        inode = file_inode(f.file);
  44        if (S_ISFIFO(inode->i_mode)) {
  45                ret = -ESPIPE;
  46                goto out;
  47        }
  48
  49        mapping = f.file->f_mapping;
  50        if (!mapping || len < 0) {
  51                ret = -EINVAL;
  52                goto out;
  53        }
  54
  55        if (IS_DAX(inode)) {
  56                switch (advice) {
  57                case POSIX_FADV_NORMAL:
  58                case POSIX_FADV_RANDOM:
  59                case POSIX_FADV_SEQUENTIAL:
  60                case POSIX_FADV_WILLNEED:
  61                case POSIX_FADV_NOREUSE:
  62                case POSIX_FADV_DONTNEED:
  63                        /* no bad return value, but ignore advice */
  64                        break;
  65                default:
  66                        ret = -EINVAL;
  67                }
  68                goto out;
  69        }
  70
  71        /* Careful about overflows. Len == 0 means "as much as possible" */
  72        endbyte = offset + len;
  73        if (!len || endbyte < len)
  74                endbyte = -1;
  75        else
  76                endbyte--;              /* inclusive */
  77
  78        bdi = inode_to_bdi(mapping->host);
  79
  80        switch (advice) {
  81        case POSIX_FADV_NORMAL:
  82                f.file->f_ra.ra_pages = bdi->ra_pages;
  83                spin_lock(&f.file->f_lock);
  84                f.file->f_mode &= ~FMODE_RANDOM;
  85                spin_unlock(&f.file->f_lock);
  86                break;
  87        case POSIX_FADV_RANDOM:
  88                spin_lock(&f.file->f_lock);
  89                f.file->f_mode |= FMODE_RANDOM;
  90                spin_unlock(&f.file->f_lock);
  91                break;
  92        case POSIX_FADV_SEQUENTIAL:
  93                f.file->f_ra.ra_pages = bdi->ra_pages * 2;
  94                spin_lock(&f.file->f_lock);
  95                f.file->f_mode &= ~FMODE_RANDOM;
  96                spin_unlock(&f.file->f_lock);
  97                break;
  98        case POSIX_FADV_WILLNEED:
  99                /* First and last PARTIAL page! */
 100                start_index = offset >> PAGE_SHIFT;
 101                end_index = endbyte >> PAGE_SHIFT;
 102
 103                /* Careful about overflow on the "+1" */
 104                nrpages = end_index - start_index + 1;
 105                if (!nrpages)
 106                        nrpages = ~0UL;
 107
 108                /*
 109                 * Ignore return value because fadvise() shall return
 110                 * success even if filesystem can't retrieve a hint,
 111                 */
 112                force_page_cache_readahead(mapping, f.file, start_index,
 113                                           nrpages);
 114                break;
 115        case POSIX_FADV_NOREUSE:
 116                break;
 117        case POSIX_FADV_DONTNEED:
 118                if (!inode_write_congested(mapping->host))
 119                        __filemap_fdatawrite_range(mapping, offset, endbyte,
 120                                                   WB_SYNC_NONE);
 121
 122                /*
 123                 * First and last FULL page! Partial pages are deliberately
 124                 * preserved on the expectation that it is better to preserve
 125                 * needed memory than to discard unneeded memory.
 126                 */
 127                start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
 128                end_index = (endbyte >> PAGE_SHIFT);
 129                if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
 130                        /* First page is tricky as 0 - 1 = -1, but pgoff_t
 131                         * is unsigned, so the end_index >= start_index
 132                         * check below would be true and we'll discard the whole
 133                         * file cache which is not what was asked.
 134                         */
 135                        if (end_index == 0)
 136                                break;
 137
 138                        end_index--;
 139                }
 140
 141                if (end_index >= start_index) {
 142                        unsigned long count;
 143
 144                        /*
 145                         * It's common to FADV_DONTNEED right after
 146                         * the read or write that instantiates the
 147                         * pages, in which case there will be some
 148                         * sitting on the local LRU cache. Try to
 149                         * avoid the expensive remote drain and the
 150                         * second cache tree walk below by flushing
 151                         * them out right away.
 152                         */
 153                        lru_add_drain();
 154
 155                        count = invalidate_mapping_pages(mapping,
 156                                                start_index, end_index);
 157
 158                        /*
 159                         * If fewer pages were invalidated than expected then
 160                         * it is possible that some of the pages were on
 161                         * a per-cpu pagevec for a remote CPU. Drain all
 162                         * pagevecs and try again.
 163                         */
 164                        if (count < (end_index - start_index + 1)) {
 165                                lru_add_drain_all();
 166                                invalidate_mapping_pages(mapping, start_index,
 167                                                end_index);
 168                        }
 169                }
 170                break;
 171        default:
 172                ret = -EINVAL;
 173        }
 174out:
 175        fdput(f);
 176        return ret;
 177}
 178
 179#ifdef __ARCH_WANT_SYS_FADVISE64
 180
 181SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
 182{
 183        return sys_fadvise64_64(fd, offset, len, advice);
 184}
 185
 186#endif
 187