linux/mm/page_counter.c
<<
>>
Prefs
   1/*
   2 * Lockless hierarchical page accounting & limiting
   3 *
   4 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
   5 */
   6
   7#include <linux/page_counter.h>
   8#include <linux/atomic.h>
   9#include <linux/kernel.h>
  10#include <linux/string.h>
  11#include <linux/sched.h>
  12#include <linux/bug.h>
  13#include <asm/page.h>
  14
  15/**
  16 * page_counter_cancel - take pages out of the local counter
  17 * @counter: counter
  18 * @nr_pages: number of pages to cancel
  19 */
  20void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
  21{
  22        long new;
  23
  24        new = atomic_long_sub_return(nr_pages, &counter->count);
  25        /* More uncharges than charges? */
  26        WARN_ON_ONCE(new < 0);
  27}
  28
  29/**
  30 * page_counter_charge - hierarchically charge pages
  31 * @counter: counter
  32 * @nr_pages: number of pages to charge
  33 *
  34 * NOTE: This does not consider any configured counter limits.
  35 */
  36void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
  37{
  38        struct page_counter *c;
  39
  40        for (c = counter; c; c = c->parent) {
  41                long new;
  42
  43                new = atomic_long_add_return(nr_pages, &c->count);
  44                /*
  45                 * This is indeed racy, but we can live with some
  46                 * inaccuracy in the watermark.
  47                 */
  48                if (new > c->watermark)
  49                        c->watermark = new;
  50        }
  51}
  52
  53/**
  54 * page_counter_try_charge - try to hierarchically charge pages
  55 * @counter: counter
  56 * @nr_pages: number of pages to charge
  57 * @fail: points first counter to hit its limit, if any
  58 *
  59 * Returns %true on success, or %false and @fail if the counter or one
  60 * of its ancestors has hit its configured limit.
  61 */
  62bool page_counter_try_charge(struct page_counter *counter,
  63                             unsigned long nr_pages,
  64                             struct page_counter **fail)
  65{
  66        struct page_counter *c;
  67
  68        for (c = counter; c; c = c->parent) {
  69                long new;
  70                /*
  71                 * Charge speculatively to avoid an expensive CAS.  If
  72                 * a bigger charge fails, it might falsely lock out a
  73                 * racing smaller charge and send it into reclaim
  74                 * early, but the error is limited to the difference
  75                 * between the two sizes, which is less than 2M/4M in
  76                 * case of a THP locking out a regular page charge.
  77                 *
  78                 * The atomic_long_add_return() implies a full memory
  79                 * barrier between incrementing the count and reading
  80                 * the limit.  When racing with page_counter_limit(),
  81                 * we either see the new limit or the setter sees the
  82                 * counter has changed and retries.
  83                 */
  84                new = atomic_long_add_return(nr_pages, &c->count);
  85                if (new > c->limit) {
  86                        atomic_long_sub(nr_pages, &c->count);
  87                        /*
  88                         * This is racy, but we can live with some
  89                         * inaccuracy in the failcnt.
  90                         */
  91                        c->failcnt++;
  92                        *fail = c;
  93                        goto failed;
  94                }
  95                /*
  96                 * Just like with failcnt, we can live with some
  97                 * inaccuracy in the watermark.
  98                 */
  99                if (new > c->watermark)
 100                        c->watermark = new;
 101        }
 102        return true;
 103
 104failed:
 105        for (c = counter; c != *fail; c = c->parent)
 106                page_counter_cancel(c, nr_pages);
 107
 108        return false;
 109}
 110
 111/**
 112 * page_counter_uncharge - hierarchically uncharge pages
 113 * @counter: counter
 114 * @nr_pages: number of pages to uncharge
 115 */
 116void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
 117{
 118        struct page_counter *c;
 119
 120        for (c = counter; c; c = c->parent)
 121                page_counter_cancel(c, nr_pages);
 122}
 123
 124/**
 125 * page_counter_limit - limit the number of pages allowed
 126 * @counter: counter
 127 * @limit: limit to set
 128 *
 129 * Returns 0 on success, -EBUSY if the current number of pages on the
 130 * counter already exceeds the specified limit.
 131 *
 132 * The caller must serialize invocations on the same counter.
 133 */
 134int page_counter_limit(struct page_counter *counter, unsigned long limit)
 135{
 136        for (;;) {
 137                unsigned long old;
 138                long count;
 139
 140                /*
 141                 * Update the limit while making sure that it's not
 142                 * below the concurrently-changing counter value.
 143                 *
 144                 * The xchg implies two full memory barriers before
 145                 * and after, so the read-swap-read is ordered and
 146                 * ensures coherency with page_counter_try_charge():
 147                 * that function modifies the count before checking
 148                 * the limit, so if it sees the old limit, we see the
 149                 * modified counter and retry.
 150                 */
 151                count = atomic_long_read(&counter->count);
 152
 153                if (count > limit)
 154                        return -EBUSY;
 155
 156                old = xchg(&counter->limit, limit);
 157
 158                if (atomic_long_read(&counter->count) <= count)
 159                        return 0;
 160
 161                counter->limit = old;
 162                cond_resched();
 163        }
 164}
 165
 166/**
 167 * page_counter_memparse - memparse() for page counter limits
 168 * @buf: string to parse
 169 * @max: string meaning maximum possible value
 170 * @nr_pages: returns the result in number of pages
 171 *
 172 * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
 173 * limited to %PAGE_COUNTER_MAX.
 174 */
 175int page_counter_memparse(const char *buf, const char *max,
 176                          unsigned long *nr_pages)
 177{
 178        char *end;
 179        u64 bytes;
 180
 181        if (!strcmp(buf, max)) {
 182                *nr_pages = PAGE_COUNTER_MAX;
 183                return 0;
 184        }
 185
 186        bytes = memparse(buf, &end);
 187        if (*end != '\0')
 188                return -EINVAL;
 189
 190        *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
 191
 192        return 0;
 193}
 194